Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions compiler/rustc_codegen_llvm/src/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -396,6 +396,7 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
| sym::bitreverse
| sym::saturating_add
| sym::saturating_sub
| sym::carryless_mul
| sym::unchecked_funnel_shl
| sym::unchecked_funnel_shr => {
let ty = args[0].layout.ty;
Expand Down Expand Up @@ -438,6 +439,11 @@ impl<'ll, 'tcx> IntrinsicCallBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
sym::bitreverse => {
self.call_intrinsic("llvm.bitreverse", &[llty], &[args[0].immediate()])
}
sym::carryless_mul if crate::llvm_util::get_version() >= (22, 0, 0) => {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since the fallback body is non-trivial, maybe this should be added to the list from #150605 ?

let lhs = args[0].immediate();
let rhs = args[1].immediate();
self.call_intrinsic("llvm.clmul", &[llty], &[lhs, rhs])
}
sym::unchecked_funnel_shl | sym::unchecked_funnel_shr => {
let is_left = name == sym::unchecked_funnel_shl;
let lhs = args[0].immediate();
Expand Down Expand Up @@ -2763,6 +2769,7 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
| sym::simd_ctlz
| sym::simd_ctpop
| sym::simd_cttz
| sym::simd_carryless_mul
| sym::simd_funnel_shl
| sym::simd_funnel_shr
) {
Expand All @@ -2787,6 +2794,7 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
sym::simd_cttz => "llvm.cttz",
sym::simd_funnel_shl => "llvm.fshl",
sym::simd_funnel_shr => "llvm.fshr",
sym::simd_carryless_mul => "llvm.clmul",
_ => unreachable!(),
};
let int_size = in_elem.int_size_and_signed(bx.tcx()).0.bits();
Expand All @@ -2812,6 +2820,17 @@ fn generic_simd_intrinsic<'ll, 'tcx>(
&[vec_ty],
&[args[0].immediate(), args[1].immediate(), args[2].immediate()],
)),
sym::simd_carryless_mul => {
if crate::llvm_util::get_version() >= (22, 0, 0) {
Ok(bx.call_intrinsic(
llvm_intrinsic,
&[vec_ty],
&[args[0].immediate(), args[1].immediate()],
))
} else {
span_bug!(span, "`simd_carryless_mul` needs LLVM 22 or higher");
}
}
_ => unreachable!(),
};
}
Expand Down
5 changes: 4 additions & 1 deletion compiler/rustc_hir_analysis/src/check/intrinsic.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ fn intrinsic_operation_unsafety(tcx: TyCtxt<'_>, intrinsic_id: LocalDefId) -> hi
| sym::bswap
| sym::caller_location
| sym::carrying_mul_add
| sym::carryless_mul
| sym::ceilf16
| sym::ceilf32
| sym::ceilf64
Expand Down Expand Up @@ -564,6 +565,7 @@ pub(crate) fn check_intrinsic_type(
(1, 0, vec![param(0), param(0)], param(0))
}
sym::saturating_add | sym::saturating_sub => (1, 0, vec![param(0), param(0)], param(0)),
sym::carryless_mul => (1, 0, vec![param(0), param(0)], param(0)),
sym::fadd_fast | sym::fsub_fast | sym::fmul_fast | sym::fdiv_fast | sym::frem_fast => {
(1, 0, vec![param(0), param(0)], param(0))
}
Expand Down Expand Up @@ -711,7 +713,8 @@ pub(crate) fn check_intrinsic_type(
| sym::simd_fmin
| sym::simd_fmax
| sym::simd_saturating_add
| sym::simd_saturating_sub => (1, 0, vec![param(0), param(0)], param(0)),
| sym::simd_saturating_sub
| sym::simd_carryless_mul => (1, 0, vec![param(0), param(0)], param(0)),
sym::simd_arith_offset => (2, 0, vec![param(0), param(1)], param(0)),
sym::simd_neg
| sym::simd_bswap
Expand Down
2 changes: 2 additions & 0 deletions compiler/rustc_span/src/symbol.rs
Original file line number Diff line number Diff line change
Expand Up @@ -642,6 +642,7 @@ symbols! {
caller_location,
capture_disjoint_fields,
carrying_mul_add,
carryless_mul,
catch_unwind,
cause,
cdylib,
Expand Down Expand Up @@ -2083,6 +2084,7 @@ symbols! {
simd_bitmask,
simd_bitreverse,
simd_bswap,
simd_carryless_mul,
simd_cast,
simd_cast_ptr,
simd_ceil,
Expand Down
54 changes: 54 additions & 0 deletions library/core/src/intrinsics/fallback.rs
Original file line number Diff line number Diff line change
Expand Up @@ -218,3 +218,57 @@ macro_rules! impl_funnel_shifts {
impl_funnel_shifts! {
u8, u16, u32, u64, u128, usize
}

#[rustc_const_unstable(feature = "core_intrinsics_fallbacks", issue = "none")]
pub const trait CarrylessMul: Copy + 'static {
/// See [`super::carryless_mul`]; we just need the trait indirection to handle
/// different types since calling intrinsics with generics doesn't work.
fn carryless_mul(self, rhs: Self) -> Self;
}

macro_rules! impl_carryless_mul{
($($type:ident),*) => {$(
/// This approach uses a bitmask of the form `0b100010001...0001` to avoid carry spilling.
/// When carries do occur, they wind up in a "hole" of zeros and are subsequently masked
/// out of the result.
Comment on lines +231 to +233
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This approach with 4-bit digits works up to integers with 4 * 15 = 60 bits. Past that, one digit can overflow to the next.

For u64, it does actually work for this "non-widening" operation, since the top digit may be computed as 16, but there is no next digit that would be affected. The wide result would be erroneous however. E.g. x.carryless_mul(x) with x = MASK as u64 as u128.

The impl for u128::carryless_mul is currently incorrect for that reason. You could probably extend the approach to use 5-bit digits, but it's likely better to just implement it in terms of u64::carryless_mul.

some tests against a naive impl: playground

#[rustc_const_unstable(feature = "core_intrinsics_fallbacks", issue = "none")]
impl const CarrylessMul for $type {
#[inline]
fn carryless_mul(self, rhs: Self) -> Self {
use crate::num::Wrapping;

// i.e. 0b100010001...0001 in binary.
const MASK: u128 = 0x1111_1111_1111_1111_1111_1111_1111_1111;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
const MASK: u128 = 0x1111_1111_1111_1111_1111_1111_1111_1111;
const MASK: u128 = !0 / 0xF;

seems easier to read

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it? When I look at that expression I'd have to trust the comment that it does what it says. The 0x1111 literal is long but to me it's clearer.


let m0 = MASK as $type;
let x = self;
let y = rhs;

let m1 = m0 << 1;
let m2 = m1 << 1;
let m3 = m2 << 1;

let x0 = Wrapping(x & m0);
let x1 = Wrapping(x & m1);
let x2 = Wrapping(x & m2);
let x3 = Wrapping(x & m3);

let y0 = Wrapping(y & m0);
let y1 = Wrapping(y & m1);
let y2 = Wrapping(y & m2);
let y3 = Wrapping(y & m3);

let z0 = (x0 * y0) ^ (x1 * y3) ^ (x2 * y2) ^ (x3 * y1);
let z1 = (x0 * y1) ^ (x1 * y0) ^ (x2 * y3) ^ (x3 * y2);
let z2 = (x0 * y2) ^ (x1 * y1) ^ (x2 * y0) ^ (x3 * y3);
let z3 = (x0 * y3) ^ (x1 * y2) ^ (x2 * y1) ^ (x3 * y0);

(z0.0 & m0) | (z1.0 & m1) | (z2.0 & m2) | (z3.0 & m3)
}
}
)*};
}

impl_carryless_mul! {
u8, u16, u32, u64, u128, usize
}
13 changes: 13 additions & 0 deletions library/core/src/intrinsics/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2179,6 +2179,19 @@ pub const unsafe fn unchecked_funnel_shr<T: [const] fallback::FunnelShift>(
unsafe { a.unchecked_funnel_shr(b, shift) }
}

/// Carryless multiply.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this meant to be a self-contained description? To me these words mean nothing.^^

///
/// Safe versions of this intrinsic are available on the integer primitives
/// via the `carryless_mul` method. For example, [`u32::carryless_mul`].
#[rustc_intrinsic]
#[rustc_nounwind]
#[rustc_const_unstable(feature = "uint_carryless_mul", issue = "152080")]
#[unstable(feature = "uint_carryless_mul", issue = "152080")]
#[miri::intrinsic_fallback_is_spec]
pub const fn carryless_mul<T: [const] fallback::CarrylessMul>(a: T, b: T) -> T {
a.carryless_mul(b)
}

/// This is an implementation detail of [`crate::ptr::read`] and should
/// not be used anywhere else. See its comments for why this exists.
///
Expand Down
12 changes: 12 additions & 0 deletions library/core/src/intrinsics/simd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,18 @@ pub const unsafe fn simd_funnel_shl<T>(a: T, b: T, shift: T) -> T;
#[rustc_nounwind]
pub const unsafe fn simd_funnel_shr<T>(a: T, b: T, shift: T) -> T;

/// Compute the carry-less product.
///
/// This is similar to long multiplication except that the carry is discarded.
///
/// This operation can be used to model multiplication in `GF(2)[X]`, the polynomial
/// ring over `GF(2)`.
///
/// `T` must be a vector of integers.
#[rustc_intrinsic]
#[rustc_nounwind]
pub unsafe fn simd_carryless_mul<T>(a: T, b: T) -> T;

/// "And"s vectors elementwise.
///
/// `T` must be a vector of integers.
Expand Down
1 change: 1 addition & 0 deletions library/core/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,7 @@
#![feature(trait_alias)]
#![feature(transparent_unions)]
#![feature(try_blocks)]
#![feature(uint_carryless_mul)]
#![feature(unboxed_closures)]
#![feature(unsized_fn_params)]
#![feature(with_negative_coherence)]
Expand Down
30 changes: 27 additions & 3 deletions library/core/src/num/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -458,6 +458,9 @@ impl u8 {
fsh_op = "0x36",
fshl_result = "0x8",
fshr_result = "0x8d",
clmul_lhs = "0x12",
clmul_rhs = "0x34",
clmul_result = "0x28",
swap_op = "0x12",
swapped = "0x12",
reversed = "0x48",
Expand Down Expand Up @@ -1095,6 +1098,9 @@ impl u16 {
fsh_op = "0x2de",
fshl_result = "0x30",
fshr_result = "0x302d",
clmul_lhs = "0x9012",
clmul_rhs = "0xcd34",
clmul_result = "0x928",
swap_op = "0x1234",
swapped = "0x3412",
reversed = "0x2c48",
Expand Down Expand Up @@ -1145,6 +1151,9 @@ impl u32 {
fsh_op = "0x2fe78e45",
fshl_result = "0xb32f",
fshr_result = "0xb32fe78e",
clmul_lhs = "0x56789012",
clmul_rhs = "0xf52ecd34",
clmul_result = "0x9b980928",
swap_op = "0x12345678",
swapped = "0x78563412",
reversed = "0x1e6a2c48",
Expand All @@ -1171,6 +1180,9 @@ impl u64 {
fsh_op = "0x2fe78e45983acd98",
fshl_result = "0x6e12fe",
fshr_result = "0x6e12fe78e45983ac",
clmul_lhs = "0x7890123456789012",
clmul_rhs = "0xdd358416f52ecd34",
clmul_result = "0xa6299579b980928",
swap_op = "0x1234567890123456",
swapped = "0x5634129078563412",
reversed = "0x6a2c48091e6a2c48",
Expand All @@ -1197,6 +1209,9 @@ impl u128 {
fsh_op = "0x2fe78e45983acd98039000008736273",
fshl_result = "0x4f7602fe",
fshr_result = "0x4f7602fe78e45983acd9803900000873",
clmul_lhs = "0x12345678901234567890123456789012",
clmul_rhs = "0x4317e40ab4ddcf05dd358416f52ecd34",
clmul_result = "0xb9cf660de35d0c170a6299579b980928",
swap_op = "0x12345678901234567890123456789012",
swapped = "0x12907856341290785634129078563412",
reversed = "0x48091e6a2c48091e6a2c48091e6a2c48",
Expand All @@ -1223,9 +1238,12 @@ impl usize {
rot = 4,
rot_op = "0xa003",
rot_result = "0x3a",
fsh_op = "0x2fe78e45983acd98039000008736273",
fshl_result = "0x4f7602fe",
fshr_result = "0x4f7602fe78e45983acd9803900000873",
fsh_op = "0x2de",
fshl_result = "0x30",
fshr_result = "0x302d",
clmul_lhs = "0x9012",
clmul_rhs = "0xcd34",
clmul_result = "0x928",
swap_op = "0x1234",
swapped = "0x3412",
reversed = "0x2c48",
Expand Down Expand Up @@ -1253,6 +1271,9 @@ impl usize {
fsh_op = "0x2fe78e45",
fshl_result = "0xb32f",
fshr_result = "0xb32fe78e",
clmul_lhs = "0x56789012",
clmul_rhs = "0xf52ecd34",
clmul_result = "0x9b980928",
swap_op = "0x12345678",
swapped = "0x78563412",
reversed = "0x1e6a2c48",
Expand Down Expand Up @@ -1280,6 +1301,9 @@ impl usize {
fsh_op = "0x2fe78e45983acd98",
fshl_result = "0x6e12fe",
fshr_result = "0x6e12fe78e45983ac",
clmul_lhs = "0x7890123456789012",
clmul_rhs = "0xdd358416f52ecd34",
clmul_result = "0xa6299579b980928",
swap_op = "0x1234567890123456",
swapped = "0x5634129078563412",
reversed = "0x6a2c48091e6a2c48",
Expand Down
29 changes: 29 additions & 0 deletions library/core/src/num/uint_macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ macro_rules! uint_impl {
fsh_op = $fsh_op:literal,
fshl_result = $fshl_result:literal,
fshr_result = $fshr_result:literal,
clmul_lhs = $clmul_rhs:literal,
clmul_rhs = $clmul_lhs:literal,
clmul_result = $clmul_result:literal,
swap_op = $swap_op:literal,
swapped = $swapped:literal,
reversed = $reversed:literal,
Expand Down Expand Up @@ -482,6 +485,32 @@ macro_rules! uint_impl {
unsafe { intrinsics::unchecked_funnel_shr(self, rhs, n) }
}

/// Performs a carry-less multiplication.
///
/// This is similar to long multiplication except that the carry is discarded.
/// This function wraps, so only the low bits are returned.
///
/// This operation can be used to model multiplication in `GF(2)[X]`, the polynomial
/// ring over `GF(2)`.
///
/// ```
/// #![feature(uint_carryless_mul)]
///
#[doc = concat!("let a = ", $clmul_lhs, stringify!($SelfT), ";")]
#[doc = concat!("let b = ", $clmul_rhs, stringify!($SelfT), ";")]
///
#[doc = concat!("assert_eq!(a.carryless_mul(b), ", $clmul_result, ");")]
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The only example here uses huge numbers which means the example is entirely useless if one doesn't already know what this operation does. I for one still have no clue after reading all this.^^

Which carry is discarded? All the ones that show up when adding up the results of the first steps of long multiplication? Or also all the carries that occur within the long multiplication steps? This needs way more detail. Doesn't that mean the result depends on the base? No base is mentioed in the first 2 paragraphs, and even in the third paragraph it's only mentioned in an obscure way.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe describe it as equivalent to:

impl uN {
    pub fn carryless_mul(self, other: Self) -> Self {
        let mut retval = 0;
        for i in 0..Self::BITS {
            if (other >> i) & 1 != 0 {
                retval ^= self << i;
            }
        }
        retval
    }
}

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is a good specification. I haven't really found/been able to come up with a good intuitive explanation, and even if you understand the mechanics, how/when to apply this function is still hard to see.


The only example here uses huge numbers

So in part that is to test that the implementation doesn't just mask off the upper bits. Because of how these functions are generated with a macro, getting examples to work with type inference and without overflowing literals is a bit finicky.

I'll try again to come up with something more insightful.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Regular long multiplication would be using += instead of ^=, right?

I have no idea why that would be called "carryless", but that does seem like a reasonable way to explain this, yeah. And maybe we should also do the const-eval/Miri implementation that way to avoid relying on the fallback impl that seems to want to be clever. We generally don't want to be clever for intrinsics in Miri.

Copy link
Contributor

@quaternic quaternic Feb 5, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Regular long multiplication would be using += instead of ^=, right?

Exactly.

It is carryless in the sense that the sum of two bits (x,y) produces a carry bit = x & y and the wrapped sum = x ^ y, so using ^ instead of + is just throwing out all the carries of bitwise addition. That is, consider the equivalence x + y = (x ^ y) + ((x & y) << 1).

The connection to polynomial multiplication is that there you naturally don't carry between the coefficients: (x + 1)(x + 1) = x^2 + 2x + 1, and the 2x doesn't just turn into x^2. Rather, if you're computing the coefficients modulo 2, you get 2x = 0x. Indeed, 0b11.carryless_mul(0b11) == 0b101, where the polynomials are represented by their valuation at x = 2.

/// ```
#[rustc_const_unstable(feature = "uint_carryless_mul", issue = "152080")]
#[doc(alias = "clmul")]
#[unstable(feature = "uint_carryless_mul", issue = "152080")]
#[must_use = "this returns the result of the operation, \
without modifying the original"]
#[inline(always)]
pub const fn carryless_mul(self, rhs: Self) -> Self {
intrinsics::carryless_mul(self, rhs)
}

/// Reverses the byte order of the integer.
///
/// # Examples
Expand Down
1 change: 1 addition & 0 deletions library/coretests/tests/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@
#![feature(try_trait_v2)]
#![feature(type_info)]
#![feature(uint_bit_width)]
#![feature(uint_carryless_mul)]
#![feature(uint_gather_scatter_bits)]
#![feature(unsize)]
#![feature(unwrap_infallible)]
Expand Down
7 changes: 7 additions & 0 deletions library/coretests/tests/num/uint_macros.rs
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,13 @@ macro_rules! uint_module {
assert_eq_const_safe!($T: <$T>::funnel_shr(_1, _1, 4), <$T>::rotate_right(_1, 4));
}

fn test_carryless_mul() {
assert_eq_const_safe!($T: <$T>::carryless_mul(0, 0), 0);
assert_eq_const_safe!($T: <$T>::carryless_mul(1, 1), 1);

assert_eq_const_safe!($T: <$T>::carryless_mul(0b0100, 2), 0b1000);
}

fn test_swap_bytes() {
assert_eq_const_safe!($T: A.swap_bytes().swap_bytes(), A);
assert_eq_const_safe!($T: B.swap_bytes().swap_bytes(), B);
Expand Down
1 change: 1 addition & 0 deletions library/std/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,7 @@
#![feature(try_blocks)]
#![feature(try_trait_v2)]
#![feature(type_alias_impl_trait)]
#![feature(uint_carryless_mul)]
// tidy-alphabetical-end
//
// Library features (core):
Expand Down
Loading