perf: embed Hmac in cookie_checker to eliminate make_cookie malloc#22
perf: embed Hmac in cookie_checker to eliminate make_cookie malloc#22MarkAtwood wants to merge 1 commit intowolfSSL:masterfrom
Conversation
…e7.14) make_cookie() previously malloc'd an 832-byte Hmac struct on every call. It is only called under rate-limiting conditions (never on the normal data path), so serialisation on the existing secret_lock write semaphore is acceptable. Add a struct Hmac make_cookie_hmac field to struct cookie_checker (embedded in the heap-allocated struct wg_device). Restructure make_cookie() to hold secret_lock for write throughout, covering both the optional secret refresh and the Hmac operation, eliminating the per-call kmalloc/free of the Hmac struct.
acaad38 to
bbd687d
Compare
There was a problem hiding this comment.
Pull request overview
Note
Copilot was unable to run its full agentic suite in this review.
This PR removes the per-call allocation of WolfCrypt’s struct Hmac in make_cookie() by embedding a reusable HMAC scratch buffer into struct cookie_checker and serializing access via secret_lock.
Changes:
- Embed
struct Hmac make_cookie_hmacinsidestruct cookie_checkerto avoid per-call malloc/free. - Restructure
make_cookie()to holdsecret_lockfor write throughout, ensuring exclusive access to the embedded HMAC scratch buffer.
Reviewed changes
Copilot reviewed 2 out of 2 changed files in this pull request and generated 3 comments.
| File | Description |
|---|---|
| kernel-src/cookie.h | Adds an embedded HMAC scratch buffer to struct cookie_checker and includes WolfCrypt glue header. |
| kernel-src/cookie.c | Updates make_cookie() to reuse the embedded HMAC and serialize via write-lock. |
💡 Add Copilot custom instructions for smarter, more guided reviews. Learn how to get started.
| /* Take write lock for the entire operation to ensure exclusive access | ||
| * to checker->make_cookie_hmac. make_cookie is only invoked under | ||
| * rate-limiting conditions, so serialising on the write lock is fine. | ||
| */ | ||
| down_write(&checker->secret_lock); | ||
|
|
||
| if ((ret == 0) && wg_birthdate_has_expired(checker->secret_birthdate, | ||
| if (wg_birthdate_has_expired(checker->secret_birthdate, | ||
| COOKIE_SECRET_MAX_AGE)) { | ||
| down_write(&checker->secret_lock); | ||
| ret = wc_get_random_bytes(checker->secret, NOISE_HASH_LEN); | ||
| if (ret == 0) | ||
| checker->secret_birthdate = ktime_get_coarse_boottime_ns(); | ||
| up_write(&checker->secret_lock); | ||
| } | ||
|
|
||
| if (ret == 0) { | ||
| down_read(&checker->secret_lock); | ||
| if (ret == 0) | ||
| ret = wc_HmacInit(&checker->make_cookie_hmac, NULL /* heap */, INVALID_DEVID); | ||
|
|
||
| ret = wc_HmacSetKey(wc_hmac, WC_SHA256, checker->secret, NOISE_HASH_LEN); | ||
| if (ret == 0) { | ||
| ret = wc_HmacSetKey(&checker->make_cookie_hmac, WC_SHA256, | ||
| checker->secret, NOISE_HASH_LEN); | ||
|
|
||
| if ((ret == 0) && (skb->protocol == htons(ETH_P_IP))) | ||
| ret = wc_HmacUpdate(wc_hmac, (u8 *)&ip_hdr(skb)->saddr, | ||
| ret = wc_HmacUpdate(&checker->make_cookie_hmac, | ||
| (u8 *)&ip_hdr(skb)->saddr, | ||
| (word32)sizeof(struct in_addr)); | ||
| else if ((ret == 0) && (skb->protocol == htons(ETH_P_IPV6))) | ||
| ret = wc_HmacUpdate(wc_hmac, (u8 *)&ipv6_hdr(skb)->saddr, | ||
| ret = wc_HmacUpdate(&checker->make_cookie_hmac, | ||
| (u8 *)&ipv6_hdr(skb)->saddr, | ||
| (word32)sizeof(struct in6_addr)); | ||
|
|
||
| if (ret == 0) | ||
| ret = wc_HmacUpdate(wc_hmac, (u8 *)&udp_hdr(skb)->source, sizeof(__be16)); | ||
| ret = wc_HmacUpdate(&checker->make_cookie_hmac, | ||
| (u8 *)&udp_hdr(skb)->source, | ||
| sizeof(__be16)); | ||
|
|
||
| if (ret == 0) | ||
| ret = wc_HmacFinal(wc_hmac, cookie); | ||
| ret = wc_HmacFinal(&checker->make_cookie_hmac, cookie); | ||
|
|
||
| up_read(&checker->secret_lock); | ||
| wc_HmacFree(&checker->make_cookie_hmac); | ||
| } | ||
|
|
||
| wc_HmacFree(wc_hmac); | ||
|
|
||
| out_hmac_uninited: | ||
|
|
||
| free(wc_hmac); | ||
| up_write(&checker->secret_lock); |
There was a problem hiding this comment.
Holding secret_lock as a write-lock across the full HMAC computation blocks all potential readers of secret_lock for the duration of wc_HmacInit/Update/Final, not just secret refresh. Under sustained rate-limited traffic (e.g., an active attacker), this can amplify contention and potentially delay other threads that only need read access to the cookie secret. A more scalable approach is to keep secret_lock semantics focused on guarding checker->secret (e.g., take down_read, copy checker->secret into a small stack/local buffer like u8 secret[NOISE_HASH_LEN], then up_read), and separately serialize access to make_cookie_hmac with a dedicated mutex/lock; this preserves the “no per-call allocation” goal without turning every cookie generation into a global write-side stop-the-world for the checker.
| /* Scratch buffer for make_cookie() — avoids per-call kmalloc of the | ||
| * 832-byte Hmac struct. Used only while secret_lock is held for write. | ||
| */ | ||
| struct Hmac make_cookie_hmac; |
There was a problem hiding this comment.
Embedding an ~832-byte struct Hmac directly into struct cookie_checker increases the size of what is likely a frequently-touched structure, which can degrade cache locality for unrelated cookie-checking operations. To keep the “allocate once, reuse many” benefit without inflating the hot struct, consider storing struct Hmac *make_cookie_hmac (or a dedicated scratch sub-struct) allocated once at device init/teardown; this avoids per-call allocation while reducing steady-state cache footprint.
| /* Scratch buffer for make_cookie() — avoids per-call kmalloc of the | |
| * 832-byte Hmac struct. Used only while secret_lock is held for write. | |
| */ | |
| struct Hmac make_cookie_hmac; | |
| /* Scratch buffer for make_cookie(); allocate once during init and reuse | |
| * to avoid per-call allocation without inflating this hot structure. | |
| * Used only while secret_lock is held for write. | |
| */ | |
| struct Hmac *make_cookie_hmac; |
| u8 message_mac1_key[NOISE_SYMMETRIC_KEY_LEN]; | ||
| u64 secret_birthdate; | ||
| struct rw_semaphore secret_lock; | ||
| /* Scratch buffer for make_cookie() — avoids per-call kmalloc of the |
There was a problem hiding this comment.
The comment uses a Unicode em dash (—), which is often discouraged in kernel-style codebases that prefer plain ASCII in source for consistency/tooling, and it also says “kmalloc” even though the prior code path used malloc() (or a wrapper). Consider switching to ASCII punctuation (e.g., -- or -) and using allocator-neutral wording (“allocation”) or the exact allocator name used in this module to prevent confusion and style/toolchain issues.
| /* Scratch buffer for make_cookie() — avoids per-call kmalloc of the | |
| /* Scratch buffer for make_cookie() - avoids per-call allocation of the |
Summary
make_cookie()inkernel-src/cookie.cis called whenever a cookie must be generated for a rate-limited peer. Previously it opened withmalloc(sizeof(struct Hmac))and closed withwc_HmacFree+free.sizeof(struct Hmac)is 832 bytes when SHA-3 is enabled.A
struct Hmac make_cookie_hmacscratch buffer is now embedded instruct cookie_checker(which is embedded in the heap-allocatedstruct wg_device).make_cookie()is restructured to holdsecret_lockfor write throughout (it already took write lock for the optional secret refresh). This serialises concurrent calls and gives exclusive access tomake_cookie_hmac, eliminating the per-call kmalloc/free.make_cookieis only called under rate-limiting conditions (never on the normal per-packet path), so write-lock serialisation is acceptable.No stack allocation
sizeof(struct Hmac)≈ 832 bytes — too large for the kernel stack. Themake_cookie_hmacfield lives in the existing heap-allocatedstruct wg_device.Test plan
make_cookiecomputes correct cookie (no regression)