From 7d7bf7e78f358121714d131175f1f93fefb3a05e Mon Sep 17 00:00:00 2001 From: Dani Pinyol Date: Sun, 12 Apr 2026 22:22:27 +0200 Subject: [PATCH 1/4] fix: rare crash due to memoty race condition MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The vectors PYJLVALUES and PYJLFREEVALUES are modified without any protection against GC-triggered re-entrancy. _pyjl_dealloc(o1) or PyJuliaValue_SetValue └→ push!(PYJLFREEVALUES, idx) # begins resize (sets internal flag) └→ vector must grow → allocates → triggers Julia GC └→ GC collects a Py object → runs py_finalizer └→ enqueue(ptr) → PyGILState_Check()==1 (same thread holds GIL) └→ Py_DecRef(ptr) → refcount hits 0 └→ _pyjl_dealloc(o2) └→ push!(PYJLFREEVALUES, idx2) ← BOOM: flag already set ConcurrencyViolationError! 1. push! needs to grow the vector (exceeds capacity), so it allocates 2. The allocation triggers Julia GC, which runs finalizers 3. A finalizer calls Py_DecRef (because enqueue sees the GIL is held on this same thread), dropping refcount to 0, which triggers _pyjl_dealloc re-entrantly on the same vector This is especially likely during shutdown (at_jl_exit → jl_atexit_hook), because jl_gc_run_all_finalizers runs finalizers on the calling thread (the main thread, which holds the GIL). Many Py objects are finalized at once, repeatedly pushing to PYJLFREEVALUES, and each push that triggers a reallocation can cause the re-entrant chain. Disabled Julia's GC during the critical vector modifications, so that push!/pop! cannot trigger allocation-based GC, which prevents the finalizer chain from re-entering. Added a SpinLock as defense-in-depth against true multi-thread races. --- src/JlWrap/C.jl | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/JlWrap/C.jl b/src/JlWrap/C.jl index 19ff2904..dc1d84c2 100644 --- a/src/JlWrap/C.jl +++ b/src/JlWrap/C.jl @@ -25,6 +25,8 @@ const PyJuliaBase_Type = Ref(C.PyNULL) const PYJLVALUES = [] # unused indices in PYJLVALUES const PYJLFREEVALUES = Int[] +# lock protecting PYJLVALUES and PYJLFREEVALUES from concurrent modification +const PYJL_LOCK = Threads.SpinLock() function _pyjl_new(t::C.PyPtr, ::C.PyPtr, ::C.PyPtr) alloc = C.PyType_GetSlot(t, C.Py_tp_alloc) @@ -39,8 +41,16 @@ end function _pyjl_dealloc(o::C.PyPtr) idx = C.@ft UnsafePtr{PyJuliaValueObject}(o).value[] if idx != 0 + # Disable GC to prevent push! from triggering a GC that runs finalizers + # re-entrantly (the finalizer chain: push! → alloc → GC → py_finalizer → + # enqueue → Py_DecRef → _pyjl_dealloc → push! on same vector → + # ConcurrencyViolationError). The lock guards against true multi-thread races. + prev = Base.GC.enable(false) + lock(PYJL_LOCK) PYJLVALUES[idx] = nothing push!(PYJLFREEVALUES, idx) + unlock(PYJL_LOCK) + Base.GC.enable(prev) end (C.@ft UnsafePtr{PyJuliaValueObject}(o).weaklist[!]) == C.PyNULL || C.PyObject_ClearWeakRefs(o) freeptr = C.PyType_GetSlot(C.Py_Type(o), C.Py_tp_free) @@ -375,6 +385,10 @@ PyJuliaValue_SetValue(_o, @nospecialize(v)) = Base.GC.@preserve _o begin o = C.asptr(_o) idx = C.@ft UnsafePtr{PyJuliaValueObject}(o).value[] if idx == 0 + # Disable GC to prevent push!/pop! from triggering a GC whose finalizers + # re-entrantly resize the same vectors (see _pyjl_dealloc comment). + prev = Base.GC.enable(false) + lock(PYJL_LOCK) if isempty(PYJLFREEVALUES) push!(PYJLVALUES, v) idx = length(PYJLVALUES) @@ -382,6 +396,8 @@ PyJuliaValue_SetValue(_o, @nospecialize(v)) = Base.GC.@preserve _o begin idx = pop!(PYJLFREEVALUES) PYJLVALUES[idx] = v end + unlock(PYJL_LOCK) + Base.GC.enable(prev) C.@ft UnsafePtr{PyJuliaValueObject}(o).value[] = idx else PYJLVALUES[idx] = v From 104103c41aa8bbab1cc0503e0113225c53c80c03 Mon Sep 17 00:00:00 2001 From: Dani Pinyol Date: Thu, 16 Apr 2026 15:40:13 +0200 Subject: [PATCH 2/4] fix: crash in atexit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After jl_atexit_hook(0) returns, Python continues destroying objects, calling _pyjl_dealloc. But Julia's GC subsystem is already torn down, so Base.GC.enable() and lock() crash. Now we add a flag that's set by a Julia atexit hook (runs before finalizers inside jl_atexit_hook). When the flag is set, _pyjl_dealloc skips all Julia runtime operations — the process is exiting anyway. --- src/JlWrap/C.jl | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/JlWrap/C.jl b/src/JlWrap/C.jl index dc1d84c2..458c9f59 100644 --- a/src/JlWrap/C.jl +++ b/src/JlWrap/C.jl @@ -27,6 +27,9 @@ const PYJLVALUES = [] const PYJLFREEVALUES = Int[] # lock protecting PYJLVALUES and PYJLFREEVALUES from concurrent modification const PYJL_LOCK = Threads.SpinLock() +# set to true by an atexit hook so _pyjl_dealloc can skip Julia runtime calls +# (Base.GC.enable, lock) after jl_atexit_hook has torn down the runtime +const JL_EXITING = Ref(false) function _pyjl_new(t::C.PyPtr, ::C.PyPtr, ::C.PyPtr) alloc = C.PyType_GetSlot(t, C.Py_tp_alloc) @@ -40,7 +43,7 @@ end function _pyjl_dealloc(o::C.PyPtr) idx = C.@ft UnsafePtr{PyJuliaValueObject}(o).value[] - if idx != 0 + if idx != 0 && !JL_EXITING[] # Disable GC to prevent push! from triggering a GC that runs finalizers # re-entrantly (the finalizer chain: push! → alloc → GC → py_finalizer → # enqueue → Py_DecRef → _pyjl_dealloc → push! on same vector → @@ -375,6 +378,9 @@ end function __init__() init_c() + atexit() do + JL_EXITING[] = true + end end PyJuliaValue_IsNull(o) = Base.GC.@preserve o (C.@ft UnsafePtr{PyJuliaValueObject}(C.asptr(o)).value[]) == 0 From 6e149eba864ebe2a48a29c9f35c0136c0246fc1d Mon Sep 17 00:00:00 2001 From: Dani Pinyol Date: Sun, 19 Apr 2026 11:26:56 +0200 Subject: [PATCH 3/4] no PYJL_LOCK --- src/JlWrap/C.jl | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/JlWrap/C.jl b/src/JlWrap/C.jl index 458c9f59..51bcb596 100644 --- a/src/JlWrap/C.jl +++ b/src/JlWrap/C.jl @@ -26,7 +26,7 @@ const PYJLVALUES = [] # unused indices in PYJLVALUES const PYJLFREEVALUES = Int[] # lock protecting PYJLVALUES and PYJLFREEVALUES from concurrent modification -const PYJL_LOCK = Threads.SpinLock() +#const PYJL_LOCK = Threads.SpinLock() # set to true by an atexit hook so _pyjl_dealloc can skip Julia runtime calls # (Base.GC.enable, lock) after jl_atexit_hook has torn down the runtime const JL_EXITING = Ref(false) @@ -49,10 +49,10 @@ function _pyjl_dealloc(o::C.PyPtr) # enqueue → Py_DecRef → _pyjl_dealloc → push! on same vector → # ConcurrencyViolationError). The lock guards against true multi-thread races. prev = Base.GC.enable(false) - lock(PYJL_LOCK) + # lock(PYJL_LOCK) PYJLVALUES[idx] = nothing push!(PYJLFREEVALUES, idx) - unlock(PYJL_LOCK) + # unlock(PYJL_LOCK) Base.GC.enable(prev) end (C.@ft UnsafePtr{PyJuliaValueObject}(o).weaklist[!]) == C.PyNULL || C.PyObject_ClearWeakRefs(o) @@ -394,7 +394,7 @@ PyJuliaValue_SetValue(_o, @nospecialize(v)) = Base.GC.@preserve _o begin # Disable GC to prevent push!/pop! from triggering a GC whose finalizers # re-entrantly resize the same vectors (see _pyjl_dealloc comment). prev = Base.GC.enable(false) - lock(PYJL_LOCK) + #lock(PYJL_LOCK) if isempty(PYJLFREEVALUES) push!(PYJLVALUES, v) idx = length(PYJLVALUES) @@ -402,7 +402,7 @@ PyJuliaValue_SetValue(_o, @nospecialize(v)) = Base.GC.@preserve _o begin idx = pop!(PYJLFREEVALUES) PYJLVALUES[idx] = v end - unlock(PYJL_LOCK) + #unlock(PYJL_LOCK) Base.GC.enable(prev) C.@ft UnsafePtr{PyJuliaValueObject}(o).value[] = idx else From cbc7ef11162647d27bc08797223fbca4e8671b00 Mon Sep 17 00:00:00 2001 From: Dani Pinyol Date: Sun, 19 Apr 2026 11:33:30 +0200 Subject: [PATCH 4/4] no GC disable --- src/JlWrap/C.jl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/JlWrap/C.jl b/src/JlWrap/C.jl index 51bcb596..0866b6b3 100644 --- a/src/JlWrap/C.jl +++ b/src/JlWrap/C.jl @@ -48,12 +48,12 @@ function _pyjl_dealloc(o::C.PyPtr) # re-entrantly (the finalizer chain: push! → alloc → GC → py_finalizer → # enqueue → Py_DecRef → _pyjl_dealloc → push! on same vector → # ConcurrencyViolationError). The lock guards against true multi-thread races. - prev = Base.GC.enable(false) +# prev = Base.GC.enable(false) # lock(PYJL_LOCK) PYJLVALUES[idx] = nothing push!(PYJLFREEVALUES, idx) # unlock(PYJL_LOCK) - Base.GC.enable(prev) + # Base.GC.enable(prev) end (C.@ft UnsafePtr{PyJuliaValueObject}(o).weaklist[!]) == C.PyNULL || C.PyObject_ClearWeakRefs(o) freeptr = C.PyType_GetSlot(C.Py_Type(o), C.Py_tp_free) @@ -393,7 +393,7 @@ PyJuliaValue_SetValue(_o, @nospecialize(v)) = Base.GC.@preserve _o begin if idx == 0 # Disable GC to prevent push!/pop! from triggering a GC whose finalizers # re-entrantly resize the same vectors (see _pyjl_dealloc comment). - prev = Base.GC.enable(false) +# prev = Base.GC.enable(false) #lock(PYJL_LOCK) if isempty(PYJLFREEVALUES) push!(PYJLVALUES, v) @@ -403,7 +403,7 @@ PyJuliaValue_SetValue(_o, @nospecialize(v)) = Base.GC.@preserve _o begin PYJLVALUES[idx] = v end #unlock(PYJL_LOCK) - Base.GC.enable(prev) +# Base.GC.enable(prev) C.@ft UnsafePtr{PyJuliaValueObject}(o).value[] = idx else PYJLVALUES[idx] = v