Finished integrating new glibc PRNG code and further optimizations

On my laptop the difference with these new changes, compared to the older code is more than 4x faster. The code was tested on ~10 different data sets and seems to work fine.
2025-07-28 00:04:38 +02:00 · 2018-01-10 16:00:54 +01:00 · 2018-01-10 16:00:54 +01:00 · 7acd739002
commit 7acd739002
parent 7738fda7d8
2 changed files with 24 additions and 25 deletions
--- a/src/pixiewps.c
+++ b/src/pixiewps.c
@ -45,14 +45,11 @@
 #include "crypto/crypto_internal-modexp.c"
 #include "crypto/hmac_sha256.c"
 #include "crypto/tc/aes_cbc.h"
+#include "random/glibc_random_yura.c"
 #include "utils.h"
 #include "wps.h"
 #include "version.h"

-#define GLIBC_MAX_GEN 4
-#include "random/glibc_random.c"
-#include "random/glibc_random_yura.c"
-
 static uint32_t ecos_rand_simplest(uint32_t *seed);
 static uint32_t ecos_rand_simple(uint32_t *seed);
 static uint32_t ecos_rand_knuth(uint32_t *seed);
@ -362,16 +359,26 @@ unsigned int hardware_concurrency()

 static void rtl_nonce_fill(uint8_t *nonce, uint32_t seed)
 {
-	struct glibc_prng glibc_prng;
-	int i;
 	uint8_t *ptr = nonce;
+	uint32_t word0 = 0, word1 = 0, word2 = 0, word3 = 0;

-	glibc_seed(&glibc_prng, seed);
+	for (int j = 0; j < 31; j++) {
+		word0 += seed * glibc_seed_tbl[j + 3];
+		word1 += seed * glibc_seed_tbl[j + 2];
+		word2 += seed * glibc_seed_tbl[j + 1];
+		word3 += seed * glibc_seed_tbl[j + 0];

-	for (i = 0; i < 4; i++, ptr += 4) {
-		uint32_t be = end_htobe32(glibc_rand(&glibc_prng));
-		memcpy(ptr, &be, sizeof be);
+		/* This does: seed = (16807LL * seed) % 0x7fffffff
+		   using the sum of digits method which works for mod N, base N+1 */
+		const uint64_t p = 16807ULL * seed; /* Seed is always positive (31 bits) */
+		seed = (p >> 31) + (p & 0x7fffffff);
 	}
+
+	uint32_t be;
+	be = end_htobe32(word0 >> 1); memcpy(ptr,      &be, sizeof be);
+	be = end_htobe32(word1 >> 1); memcpy(ptr +  4, &be, sizeof be);
+	be = end_htobe32(word2 >> 1); memcpy(ptr +  8, &be, sizeof be);
+	be = end_htobe32(word3 >> 1); memcpy(ptr + 12, &be, sizeof be);
 }

 static int find_rtl_es1(struct global *wps, char *pin, uint8_t *nonce_buf, uint32_t seed)
--- a/src/random/glibc_random_yura.c
+++ b/src/random/glibc_random_yura.c
@ -26,12 +26,8 @@ static inline uint32_t *glibc_fast_nonce(uint32_t seed, uint32_t *dest)

 		/* This does: seed = (16807LL * seed) % 0x7fffffff
 		   using the sum of digits method which works for mod N, base N+1 */
-		const uint64_t p = 16807ULL * seed;
-		const uint64_t m = (p >> 31) + (p & 0x7fffffff);
-
-		/* The result might still not fit in 31 bits, if not, repeat
-		   (conditional seems to make it slightly faster) */
-		seed = (m & 0xffffffff80000000) ? ((m >> 31) + (m & 0x7fffffff)) : m;
+		const uint64_t p = 16807ULL * seed; /* Seed is always positive (31 bits) */
+		seed = (p >> 31) + (p & 0x7fffffff);
 	}
 	dest[0] = word0 >> 1;
 	dest[1] = word1 >> 1;
@ -44,17 +40,13 @@ static inline uint32_t glibc_fast_seed(uint32_t seed)
 {
 	uint32_t word0 = 0;

-	for (int j = 0; j < 31; j++) {
-		word0 += seed * glibc_seed_tbl[j + 3];
+	for (int j = 3; j < 31 + 3 - 1; j++) {
+		word0 += seed * glibc_seed_tbl[j];

 		/* This does: seed = (16807LL * seed) % 0x7fffffff
 		   using the sum of digits method which works for mod N, base N+1 */
-		const uint64_t p = 16807ULL * seed;
-		const uint64_t m = (p >> 31) + (p & 0x7fffffff);
-
-		/* The result might still not fit in 31 bits, if not, repeat
-		   (conditional seems to make it slightly faster) */
-		seed = (m & 0xffffffff80000000) ? ((m >> 31) + (m & 0x7fffffff)) : m;
+		const uint64_t p = 16807ULL * seed; /* Seed is always positive (31 bits) */
+		seed = (p >> 31) + (p & 0x7fffffff);
 	}
-	return word0 >> 1;
+	return (word0 + seed * glibc_seed_tbl[33]) >> 1;
 }