Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 125 additions & 3 deletions crates/erg-vanity-gpu/kernels/secp256k1_point.cl
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,93 @@ __constant uchar GY_BYTES[32] = {
(uchar)0x9C, (uchar)0x47, (uchar)0xD0, (uchar)0x8F, (uchar)0xFB, (uchar)0x10, (uchar)0xD4, (uchar)0xB8
};

// Precomputed table: G_TABLE[i] = i * G for i = 0..15 (Jacobian coords)
// Each point = 24 uints (8 for X, 8 for Y, 8 for Z)
// Layout: [X[0..7], Y[0..7], Z[0..7]] with limb 0 = LSB
// Generated by: cargo run --bin gen_g_table
__constant uint G_TABLE[16][24] = {
{ // 0*G (infinity)
0x00000001u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u,
0x00000001u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u,
0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u
},
{ // 1*G
0x16F81798u, 0x59F2815Bu, 0x2DCE28D9u, 0x029BFCDBu, 0xCE870B07u, 0x55A06295u, 0xF9DCBBACu, 0x79BE667Eu,
0xFB10D4B8u, 0x9C47D08Fu, 0xA6855419u, 0xFD17B448u, 0x0E1108A8u, 0x5DA4FBFCu, 0x26A3C465u, 0x483ADA77u,
0x00000001u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u
},
{ // 2*G
0x5C709EE5u, 0xABAC09B9u, 0x8CEF3CA7u, 0x5C778E4Bu, 0x95C07CD8u, 0x3045406Eu, 0x41ED7D6Du, 0xC6047F94u,
0x50CFE52Au, 0x236431A9u, 0x3266D0E1u, 0xF7F63265u, 0x466CEAEEu, 0xA3C58419u, 0xA63DC339u, 0x1AE168FEu,
0x00000001u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u
},
{ // 3*G
0xBCE036F9u, 0x8601F113u, 0x836F99B0u, 0xB531C845u, 0xF89D5229u, 0x49344F85u, 0x9258C310u, 0xF9308A01u,
0x84B8E672u, 0x6CB9FD75u, 0x34C2231Bu, 0x6500A999u, 0x2A37F356u, 0x0FE337E6u, 0x632DE814u, 0x388F7B0Fu,
0x00000001u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u
},
{ // 4*G
0xE8C4CD13u, 0x74FA94ABu, 0x0EE07584u, 0xCC6C1390u, 0x930B1404u, 0x581E4904u, 0xC10D80F3u, 0xE493DBF1u,
0x47739922u, 0xCFE97BDCu, 0xBFBDFE40u, 0xD967AE33u, 0x8EA51448u, 0x5642E209u, 0xA0D455B7u, 0x51ED993Eu,
0x00000001u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u
},
{ // 5*G
0xB240EFE4u, 0xCBA8D569u, 0xDC619AB7u, 0xE88B84BDu, 0x0A5C5128u, 0x55B4A725u, 0x1A072093u, 0x2F8BDE4Du,
0xA6AC62D6u, 0xDCA87D3Au, 0xAB0D6840u, 0xF788271Bu, 0xA6C9C426u, 0xD4DBA9DDu, 0x36E5E3D6u, 0xD8AC2226u,
0x00000001u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u
},
{ // 6*G
0x60297556u, 0x2F057A14u, 0x8568A18Bu, 0x82F6472Fu, 0x355235D3u, 0x20453A14u, 0x755EEEA4u, 0xFFF97BD5u,
0xB075F297u, 0x3C870C36u, 0x518FE4A0u, 0xDE80F0F6u, 0x7F45C560u, 0xF3BE9601u, 0xACFBB620u, 0xAE12777Au,
0x00000001u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u
},
{ // 7*G
0xCAC4F9BCu, 0xE92BDDEDu, 0x0330E39Cu, 0x3D419B7Eu, 0xF2EA7A0Eu, 0xA398F365u, 0x6E5DB4EAu, 0x5CBDF064u,
0x087264DAu, 0xA5082628u, 0x13FDE7B5u, 0xA813D0B8u, 0x861A54DBu, 0xA3178D6Du, 0xBA255960u, 0x6AEBCA40u,
0x00000001u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u
},
{ // 8*G
0xE10A2A01u, 0x67784EF3u, 0xE5AF888Au, 0x0A1BDD05u, 0xB70F3C2Fu, 0xAFF3843Fu, 0x5CCA351Du, 0x2F01E5E1u,
0x6CBDE904u, 0xB5DA2CB7u, 0xBA5B7617u, 0xC2E213D6u, 0x132D13B4u, 0x293D082Au, 0x41539949u, 0x5C4DA8A7u,
0x00000001u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u
},
{ // 9*G
0xFC27CCBEu, 0xC35F110Du, 0x4C57E714u, 0xE0979697u, 0x9F559ABDu, 0x09AD178Au, 0xF0C7F653u, 0xACD484E2u,
0xC64F9C37u, 0x05CC262Au, 0x375F8E0Fu, 0xADD888A4u, 0x763B61E9u, 0x64380971u, 0xB0A7D9FDu, 0xCC338921u,
0x00000001u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u
},
{ // 10*G
0x47E247C7u, 0x52A68E2Au, 0x1943C2B7u, 0x3442D49Bu, 0x1AE6AE5Du, 0x35477C7Bu, 0x47F3C862u, 0xA0434D9Eu,
0x037368D7u, 0x3CBEE53Bu, 0xD877A159u, 0x6F794C2Eu, 0x93A24C69u, 0xA3B6C7E6u, 0x5419BC27u, 0x893ABA42u,
0x00000001u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u
},
{ // 11*G
0x5DA008CBu, 0xBBEC1789u, 0xE5C17891u, 0x5649980Bu, 0x70C65AACu, 0x5EF4246Bu, 0x58A9411Eu, 0x774AE7F8u,
0xC953C61Bu, 0x301D74C9u, 0xDFF9D6A8u, 0x372DB1E2u, 0xD7B7B365u, 0x0243DD56u, 0xEB6B5E19u, 0xD984A032u,
0x00000001u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u
},
{ // 12*G
0x70AFE85Au, 0xC5B0F470u, 0x9620095Bu, 0x687CF441u, 0x4D734633u, 0x15C38F00u, 0x48E7561Bu, 0xD01115D5u,
0xF4062327u, 0x6B051B13u, 0xD9A86D52u, 0x79238C5Du, 0xE17BD815u, 0xA8B64537u, 0xC815E0D7u, 0xA9F34FFDu,
0x00000001u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u
},
{ // 13*G
0x19405AA8u, 0xDEEDDF8Fu, 0x610E58CDu, 0xB075FBC6u, 0xC3748651u, 0xC7D1D205u, 0xD975288Bu, 0xF28773C2u,
0xDB03ED81u, 0x29B5CB52u, 0x521FA91Fu, 0x3A1A06DAu, 0x65CDAF47u, 0x758212EBu, 0x8D880A89u, 0x0AB0902Eu,
0x00000001u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u
},
{ // 14*G
0x60E823E4u, 0xE49B241Au, 0x678949E6u, 0x26AA7B63u, 0x07D38E32u, 0xFD64E67Fu, 0x895E719Cu, 0x499FDF9Eu,
0x03A13F5Bu, 0xC65F40D4u, 0x7A3F95BCu, 0x464279C2u, 0xA7B3D464u, 0x90F044E4u, 0xB54E8551u, 0xCAC2F6C4u,
0x00000001u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u
},
{ // 15*G
0xE27E080Eu, 0x44ADBCF8u, 0x3C85F79Eu, 0x31E5946Fu, 0x095FF411u, 0x5A465AE3u, 0x7D43EA96u, 0xD7924D4Fu,
0xF6A26B58u, 0xC504DC9Fu, 0xD896D3A5u, 0xEA40AF2Bu, 0x28CC6DEFu, 0x83842EC2u, 0xA86C72A6u, 0x581E2872u,
0x00000001u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u, 0x00000000u
}
};

// Point structure: 3 field elements (X, Y, Z) in Jacobian coordinates
// We pass points as arrays of 24 uints (3 * 8 limbs)
// Layout: [X[0..7], Y[0..7], Z[0..7]]
Expand Down Expand Up @@ -215,10 +302,45 @@ inline void pt_mul(__private uint* r, __private const uint* k, __private const u
}

// Multiply generator G by scalar k: r = k * G
// Uses 4-bit windowed multiplication with precomputed G_TABLE
// MSB-first fixed-window: no warp divergence, ping-pong buffers
// pt_double(∞)=∞ and pt_add(∞,P)=P, so uniform loop body is safe
inline void pt_mul_generator(__private uint* r, __private const uint* k) {
uint g[24];
pt_generator(g);
pt_mul(r, k, g);
uchar k_bytes[32];
sc_to_bytes(k_bytes, k);

uint buf0[24], buf1[24], selected[24];
__private uint* acc = buf0;
__private uint* tmp = buf1;
pt_infinity(acc);

// Process 32 bytes MSB-first, high nibble then low nibble per byte
for (int byte_idx = 0; byte_idx < 32; byte_idx++) {
uchar b = k_bytes[byte_idx];

// High nibble (bits 4-7)
for (int i = 0; i < 4; i++) {
pt_double(tmp, acc);
__private uint* swap = acc; acc = tmp; tmp = swap;
}
uint nibble = (b >> 4) & 0xFu;
for (int j = 0; j < 24; j++) selected[j] = G_TABLE[nibble][j];
pt_add(tmp, acc, selected);
{ __private uint* swap = acc; acc = tmp; tmp = swap; }

// Low nibble (bits 0-3)
for (int i = 0; i < 4; i++) {
pt_double(tmp, acc);
__private uint* swap = acc; acc = tmp; tmp = swap;
}
nibble = b & 0xFu;
for (int j = 0; j < 24; j++) selected[j] = G_TABLE[nibble][j];
pt_add(tmp, acc, selected);
{ __private uint* swap = acc; acc = tmp; tmp = swap; }
}

// Copy final result (only copy needed)
pt_copy(r, acc);
}

// Convert to affine coordinates
Expand Down
138 changes: 138 additions & 0 deletions crates/erg-vanity-gpu/src/bin/gen_g_table.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
//! Generate G_TABLE for windowed scalar multiplication
//!
//! Run with: cargo run --bin gen_g_table

use erg_vanity_crypto::secp256k1::point::Point;
use erg_vanity_crypto::secp256k1::scalar::Scalar;

/// Convert big-endian bytes to 8×32-bit limbs (little-endian order, matching kernel)
fn bytes_to_limbs(bytes: &[u8; 32]) -> [u32; 8] {
// Kernel's fe_from_constant_bytes does:
// for i in 0..8: off = (7-i)*4; r[i] = bytes[off..off+4] as big-endian u32
// So: r[0] = bytes[28..31], r[7] = bytes[0..3]
let mut limbs = [0u32; 8];
for (i, limb) in limbs.iter_mut().enumerate() {
let off = (7 - i) * 4;
*limb = ((bytes[off] as u32) << 24)
| ((bytes[off + 1] as u32) << 16)
| ((bytes[off + 2] as u32) << 8)
| (bytes[off + 3] as u32);
}
limbs
}

fn main() {
// Expected kernel constants for verification
const EXPECTED_GX: [u32; 8] = [
0x16F81798, 0x59F2815B, 0x2DCE28D9, 0x029BFCDB, 0xCE870B07, 0x55A06295, 0xF9DCBBAC,
0x79BE667E,
];
const EXPECTED_GY: [u32; 8] = [
0xFB10D4B8, 0x9C47D08F, 0xA6855419, 0xFD17B448, 0x0E1108A8, 0x5DA4FBFC, 0x26A3C465,
0x483ADA77,
];

println!("// Precomputed table: G_TABLE[i] = i * G for i = 0..15 (Jacobian coords)");
println!("// Each point = 24 uints (8 for X, 8 for Y, 8 for Z)");
println!("// Layout: [X[0..7], Y[0..7], Z[0..7]] with limb 0 = LSB");
println!("// Generated by: cargo run --bin gen_g_table");
println!("//");
println!("// Kernel constants for cross-check:");
println!("// fe_one = [0x00000001, 0, 0, 0, 0, 0, 0, 0]");
println!("// fe_zero = [0, 0, 0, 0, 0, 0, 0, 0]");
println!(
"// GX limbs: [{:#010X}, {:#010X}, ...]",
EXPECTED_GX[0], EXPECTED_GX[1]
);
println!(
"// GY limbs: [{:#010X}, {:#010X}, ...]",
EXPECTED_GY[0], EXPECTED_GY[1]
);
println!("__constant uint G_TABLE[16][24] = {{");

for i in 0..16u64 {
let (x_limbs, y_limbs, z_limbs, x_bytes_opt, y_bytes_opt) = if i == 0 {
// Infinity: X=1, Y=1, Z=0 (matches kernel's pt_infinity)
let one = [1u32, 0, 0, 0, 0, 0, 0, 0];
let zero = [0u32; 8];
(one, one, zero, None, None)
} else {
// Compute i * G
let scalar_bytes = {
let mut bytes = [0u8; 32];
bytes[31] = i as u8;
bytes
};
let scalar = Scalar::from_bytes(&scalar_bytes).unwrap();
let point = Point::mul_generator(&scalar);
let (x, y) = point.to_affine().expect("Point should not be infinity");

let x_bytes = x.to_bytes();
let y_bytes = y.to_bytes();
let x_limbs = bytes_to_limbs(&x_bytes);
let y_limbs = bytes_to_limbs(&y_bytes);
let z_one = [1u32, 0, 0, 0, 0, 0, 0, 0]; // Z=1 for affine

// Verify 1*G matches kernel constants
if i == 1 {
assert_eq!(x_limbs, EXPECTED_GX, "1*G X mismatch with kernel GX!");
assert_eq!(y_limbs, EXPECTED_GY, "1*G Y mismatch with kernel GY!");
eprintln!("✓ 1*G matches kernel GX/GY constants");
}

(x_limbs, y_limbs, z_one, Some(x_bytes), Some(y_bytes))
};

// Print entry header with big-endian bytes for verification
print!(" {{ // {}*G", i);
if let (Some(xb), Some(yb)) = (&x_bytes_opt, &y_bytes_opt) {
print!(
"\n // X: {:02X}{:02X}{:02X}{:02X}...{:02X}{:02X}{:02X}{:02X}",
xb[0], xb[1], xb[2], xb[3], xb[28], xb[29], xb[30], xb[31]
);
print!(
"\n // Y: {:02X}{:02X}{:02X}{:02X}...{:02X}{:02X}{:02X}{:02X}",
yb[0], yb[1], yb[2], yb[3], yb[28], yb[29], yb[30], yb[31]
);
}
println!();
print!(" ");

// X limbs
for (j, limb) in x_limbs.iter().enumerate() {
print!("{:#010X}u", limb);
if j < 7 {
print!(", ");
} else {
print!(",\n ");
}
}

// Y limbs
for (j, limb) in y_limbs.iter().enumerate() {
print!("{:#010X}u", limb);
if j < 7 {
print!(", ");
} else {
print!(",\n ");
}
}

// Z limbs
for (j, limb) in z_limbs.iter().enumerate() {
print!("{:#010X}u", limb);
if j < 7 {
print!(", ");
}
}
println!();

if i < 15 {
println!(" }},");
} else {
println!(" }}");
}
}

println!("}};");
}