small performance improvements

TilmanNeumann · TilmanNeumann · commit ef1832b8b5b4 · 2025-01-19T18:40:38.000+01:00
diff --git a/src/main/java/de/tilman_neumann/jml/factor/ecm/TinyEcm64MH.java b/src/main/java/de/tilman_neumann/jml/factor/ecm/TinyEcm64MH.java
@@ -103,7 +103,7 @@ public ecm_work() {
 
 	private static final boolean DEBUG = false;
 
-	private static final int MAX_BITS_SUPPORTED = 62;
+	private static final int MAX_BITS_SUPPORTED = 62; // seems to work for 63 bit numbers now, but very slow - so not completely fixed for that
 	
 	// The reducer R is 2^64, but the only constant still required is the half of it.
 	private static final long R_HALF = 1L << 63;
@@ -227,7 +227,7 @@ long u64div(long c, long n) {
 	 * @return u*v mod m
 	 */
 	long spMulMod(long u, long v, long m) {
-		return Uint128.spMul64_MH(u, v).spDivide_MH(m)[1];
+		return Uint128.mul64SignedMH(u, v).spDivide_MH(m)[1];
 	}
 
 	long spGCD(long x, long y) {
@@ -1094,15 +1094,15 @@ private long setUpMontgomeryMult_v2(long N) {
 	 */
 	public static long montMul64(long a, long b, long N, long Nhat) {
 		// Step 1: Compute a*b
-		Uint128 ab = Uint128.spMul64_MH(a, b);
+		Uint128 ab = Uint128.mul64SignedMH(a, b);
 		// Step 2: Compute t = ab * (-1/N) mod R
 		// Since R=2^64, "x mod R" just means to get the low part of x.
 		// That would give t = Uint128.mul64(ab.getLow(), minusNInvModR).getLow();
 		// but even better, the long product just gives the low part -> we can get rid of one expensive mul64().
 		long t = ab.getLow() * Nhat;
 		// Step 3: Compute r = (a*b + t*N) / R
 		// Since R=2^64, "x / R" just means to get the high part of x.
-		long r = ab.add_getHigh(Uint128.spMul64_MH(t, N));
+		long r = ab.add_getHigh(Uint128.mul64SignedMH(t, N));
 		// If the correct result is c, then now r==c or r==c+N.
 		r = r<N ? r : r-N; // required at ecm
 
diff --git a/src/main/java/de/tilman_neumann/jml/factor/ecm/TinyEcm64MHInlined.java b/src/main/java/de/tilman_neumann/jml/factor/ecm/TinyEcm64MHInlined.java
@@ -1104,7 +1104,7 @@ public static long montMul64(long a, long b, long N, long Nhat) {
 		long t = abLow * Nhat;
 		final long tNLow = t*N;
 		long tNHigh = Math.multiplyHigh(t, N);
-		if (t<0) tNHigh += N;
+		//if (t<0) tNHigh += N; // bad for performance
 
 		// Step 3: Compute r = (a*b + t*N) / R
 		// Since R=2^64, "x / R" just means to get the high part of x.
diff --git a/src/main/java/de/tilman_neumann/jml/factor/pollardRho/PollardRhoBrentMontgomery64MHInlined.java b/src/main/java/de/tilman_neumann/jml/factor/pollardRho/PollardRhoBrentMontgomery64MHInlined.java
@@ -18,7 +18,6 @@
 import org.apache.logging.log4j.Logger;
 import org.apache.logging.log4j.LogManager;
 
-import de.tilman_neumann.jml.base.Uint128;
 import de.tilman_neumann.jml.factor.FactorAlgorithm;
 import de.tilman_neumann.jml.gcd.Gcd63;
 import de.tilman_neumann.jml.random.Rng;
@@ -180,12 +179,11 @@ public static long montMul64(long a, long b, long N, long Nhat) {
 		final long tNLow = t*N;
 		long tNHigh = Math.multiplyHigh(t, N);
 		//if (t<0) tNHigh += N; // bad for performance
-		Uint128 tN = new Uint128(tNHigh, tNLow); // for some reason, removing this object seems to degrade performance
 		
 		// Step 3: Compute r = (a*b + t*N) / R
 		// Since R=2^64, "x / R" just means to get the high part of x.
-		long low = abLow + tN.getLow();
-		long high = abHigh + tN.getHigh();
+		long low = abLow + tNLow;
+		long high = abHigh + tNHigh;
 		long r = (low+Long.MIN_VALUE < abLow+Long.MIN_VALUE) ? high + 1 : high;
 		// If the correct result is c, then now r==c or r==c+N.
 		// This is fine for this factoring algorithm, because r will