From 7e8f57b8d57193b0d78261a22cb544860d89c987 Mon Sep 17 00:00:00 2001 From: atarpara Date: Sat, 16 May 2026 20:19:58 +0530 Subject: [PATCH 1/5] =?UTF-8?q?=E2=9A=A1=EF=B8=8F=20Optimize=20cbrt?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/utils/clz/FixedPointMathLib.sol | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/utils/clz/FixedPointMathLib.sol b/src/utils/clz/FixedPointMathLib.sol index 247915c33..c8784884a 100644 --- a/src/utils/clz/FixedPointMathLib.sol +++ b/src/utils/clz/FixedPointMathLib.sol @@ -807,14 +807,13 @@ library FixedPointMathLib { /// @solidity memory-safe-assembly assembly { // Initial guess z ≈ c · 2^q where b = ⌊log₂(x)⌋, q = ⌊b / 3⌋. The - // 8-bit fixed-point multipliers `c`: 144/128, 181/128, and 229/128 + // 8-bit fixed-point multipliers `c`: 140/128, 172/128, and 236/128 // are selected by `b mod 3` to balance each octave's worst-case // final error. This gives >98 bits of precision after only 5 // Newton-Raphson iterations. The `or(..., 1)` keeps z ≥ 1 when the // shifted estimate is 0. let b := sub(255, clz(x)) - z := or(shr(7, shl(div(b, 3), byte(add(mod(b, 3), 29), 0x90b5e5))), 1) - + z := or(shr(7, shl(div(b, 3), add(108, shl(mod(b, 3), 32)))), 1) // 5 Newton-Raphson iterations z := div(add(add(div(x, mul(z, z)), z), z), 3) z := div(add(add(div(x, mul(z, z)), z), z), 3) From 4e0e6eb95cc76dda2a16243be57e2851818bc9ba Mon Sep 17 00:00:00 2001 From: atarpara Date: Sat, 16 May 2026 20:21:51 +0530 Subject: [PATCH 2/5] Typo --- src/utils/clz/FixedPointMathLib.sol | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/utils/clz/FixedPointMathLib.sol b/src/utils/clz/FixedPointMathLib.sol index c8784884a..a308c1d7a 100644 --- a/src/utils/clz/FixedPointMathLib.sol +++ b/src/utils/clz/FixedPointMathLib.sol @@ -809,7 +809,7 @@ library FixedPointMathLib { // Initial guess z ≈ c · 2^q where b = ⌊log₂(x)⌋, q = ⌊b / 3⌋. The // 8-bit fixed-point multipliers `c`: 140/128, 172/128, and 236/128 // are selected by `b mod 3` to balance each octave's worst-case - // final error. This gives >98 bits of precision after only 5 + // final error. This gives >89 bits of precision after only 5 // Newton-Raphson iterations. The `or(..., 1)` keeps z ≥ 1 when the // shifted estimate is 0. let b := sub(255, clz(x)) From c8353b1b582d54921b1c2454a5e2538027a2a19b Mon Sep 17 00:00:00 2001 From: atarpara Date: Sun, 17 May 2026 09:31:25 +0530 Subject: [PATCH 3/5] Fixed constant --- src/utils/clz/FixedPointMathLib.sol | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/utils/clz/FixedPointMathLib.sol b/src/utils/clz/FixedPointMathLib.sol index a308c1d7a..949a6d846 100644 --- a/src/utils/clz/FixedPointMathLib.sol +++ b/src/utils/clz/FixedPointMathLib.sol @@ -809,11 +809,11 @@ library FixedPointMathLib { // Initial guess z ≈ c · 2^q where b = ⌊log₂(x)⌋, q = ⌊b / 3⌋. The // 8-bit fixed-point multipliers `c`: 140/128, 172/128, and 236/128 // are selected by `b mod 3` to balance each octave's worst-case - // final error. This gives >89 bits of precision after only 5 + // final error. This gives >88 bits of precision after only 5 // Newton-Raphson iterations. The `or(..., 1)` keeps z ≥ 1 when the // shifted estimate is 0. let b := sub(255, clz(x)) - z := or(shr(7, shl(div(b, 3), add(108, shl(mod(b, 3), 32)))), 1) + z := or(shr(7, shl(div(b, 3), add(121, shl(mod(b, 3), 28)))), 1) // 5 Newton-Raphson iterations z := div(add(add(div(x, mul(z, z)), z), z), 3) z := div(add(add(div(x, mul(z, z)), z), z), 3) From 9f046e0241b33104111859b447e7b650bffe47dc Mon Sep 17 00:00:00 2001 From: atarpara Date: Sun, 17 May 2026 12:31:08 +0530 Subject: [PATCH 4/5] Optimize more --- src/utils/clz/FixedPointMathLib.sol | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/src/utils/clz/FixedPointMathLib.sol b/src/utils/clz/FixedPointMathLib.sol index 949a6d846..358ee88a3 100644 --- a/src/utils/clz/FixedPointMathLib.sol +++ b/src/utils/clz/FixedPointMathLib.sol @@ -806,14 +806,15 @@ library FixedPointMathLib { function cbrt(uint256 x) internal pure returns (uint256 z) { /// @solidity memory-safe-assembly assembly { - // Initial guess z ≈ c · 2^q where b = ⌊log₂(x)⌋, q = ⌊b / 3⌋. The - // 8-bit fixed-point multipliers `c`: 140/128, 172/128, and 236/128 + // Initial guess z ≈ c · 2^q where b = ⌊log₂(x) + 2⌋, q = ⌊b / 3⌋. The + // 8-bit fixed-point multipliers `c`: 89/128, 115/128, and 141/128 // are selected by `b mod 3` to balance each octave's worst-case - // final error. This gives >88 bits of precision after only 5 - // Newton-Raphson iterations. The `or(..., 1)` keeps z ≥ 1 when the - // shifted estimate is 0. - let b := sub(255, clz(x)) - z := or(shr(7, shl(div(b, 3), add(121, shl(mod(b, 3), 28)))), 1) + // final error. This gives >97 bits of precision after only 5 + // Newton-Raphson iterations. + + let b := sub(257, clz(x)) + z := shr(7, shl(div(b, 3), add(89, mul(26, mod(b, 3))))) + // 5 Newton-Raphson iterations z := div(add(add(div(x, mul(z, z)), z), z), 3) z := div(add(add(div(x, mul(z, z)), z), z), 3) From 3d4000b95d2eade62bfd1dac23bc5ed7a448ed5e Mon Sep 17 00:00:00 2001 From: atarpara Date: Tue, 19 May 2026 10:10:19 +0530 Subject: [PATCH 5/5] New constant --- src/utils/clz/FixedPointMathLib.sol | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/src/utils/clz/FixedPointMathLib.sol b/src/utils/clz/FixedPointMathLib.sol index 358ee88a3..5b19001c8 100644 --- a/src/utils/clz/FixedPointMathLib.sol +++ b/src/utils/clz/FixedPointMathLib.sol @@ -807,13 +807,12 @@ library FixedPointMathLib { /// @solidity memory-safe-assembly assembly { // Initial guess z ≈ c · 2^q where b = ⌊log₂(x) + 2⌋, q = ⌊b / 3⌋. The - // 8-bit fixed-point multipliers `c`: 89/128, 115/128, and 141/128 + // 8-bit fixed-point multipliers `c`: 90/128, 116/128, and 142/128 // are selected by `b mod 3` to balance each octave's worst-case - // final error. This gives >97 bits of precision after only 5 + // final error. This gives >94 bits of precision after only 5 // Newton-Raphson iterations. - - let b := sub(257, clz(x)) - z := shr(7, shl(div(b, 3), add(89, mul(26, mod(b, 3))))) + z := sub(257, clz(x)) + z := shr(7, shl(div(z, 3), add(90, mul(26, mod(z, 3))))) // 5 Newton-Raphson iterations z := div(add(add(div(x, mul(z, z)), z), z), 3)