Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions Ports/iOSPort/nativeSources/ExecutableOp.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,12 @@ green:((float)((rgbValue >> 8) & 0xff))/255.0 blue:((float)(rgbValue & 0xff))/25
// Phase 3: render target for this op. nil = screen drawable (default,
// existing GL/Metal screen pipeline). non-nil = a mutable image whose
// backing MTLTexture should receive this op. drawFrame walks the queue
// and switches encoders when target changes between ops.
__unsafe_unretained GLUIImage *target;
// and switches encoders when target changes between ops. Retained by
// setTarget (released in dealloc) -- the main-thread drain runs after
// the EDT enqueued the op, so an unretained target could be deallocated
// in between. Plain ivar = __strong under ARC, manual retain otherwise,
// matching the ops' image ivars (e.g. DrawImage.img).
GLUIImage *target;
#endif
}

Expand Down
14 changes: 14 additions & 0 deletions Ports/iOSPort/nativeSources/ExecutableOp.m
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ -(void)execute {

#ifndef CN1_USE_ARC
-(void)dealloc {
#ifdef CN1_USE_METAL
[target release];
#endif
[super dealloc];
}
#endif
Expand Down Expand Up @@ -134,6 +137,17 @@ -(GLUIImage*)target {
return target;
}
-(void)setTarget:(GLUIImage*)t {
// The drawFrame drain dereferences this on the main thread after the EDT
// queued the op. An unretained mutable image can be deallocated in between
// (Java-side GC finalizing the Image), leaving a dangling pointer that
// surfaced as unrecognized-selector / SIGSEGV mid-frame. Retain like the
// ops' image ivars (e.g. DrawImage.img).
#ifndef CN1_USE_ARC
if (t != target) {
[t retain];
[target release];
}
#endif
target = t;
}
#endif
Expand Down
45 changes: 45 additions & 0 deletions scripts/run-ios-ui-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -730,6 +730,39 @@ APP_PROCESS_NAME="${WRAPPER_NAME%.app}"
echo "App Install : $(( (INSTALL_END - INSTALL_START) * 1000 )) ms" >> "$ARTIFACTS_DIR/ios-test-stats.txt"
echo "App Launch : $(( (LAUNCH_END - LAUNCH_START) * 1000 )) ms" >> "$ARTIFACTS_DIR/ios-test-stats.txt"

# Timestamp marker so crash reports written during this run can be picked
# out of ~/Library/Logs/DiagnosticReports afterwards (find -newer). The
# simulator app is a host process, so its crash reports land on the host.
LAUNCH_MARKER="$ARTIFACTS_DIR/.launch-marker"
touch "$LAUNCH_MARKER"
APP_EXECUTABLE_NAME="$(/usr/libexec/PlistBuddy -c 'Print CFBundleExecutable' "$APP_BUNDLE_PATH/Info.plist" 2>/dev/null || true)"

# When the suite times out the app is usually not idle: ParparVM's
# SignalHandler (CodenameOne_GLAppDelegate.m) converts SIGSEGV into a Java
# NPE and returns, so a thread that faulted outside a Java try frame
# re-executes the faulting instruction forever ("We had a signal 11" spam
# in the device log). The simulator app is a plain host process, so a
# `sample` taken at timeout contains the exact faulting stack (and, for
# genuine deadlocks, every thread's wait state).
capture_hang_diagnostics() {
local pid spam
if [ -n "$APP_EXECUTABLE_NAME" ]; then
pid="$(pgrep -x "$APP_EXECUTABLE_NAME" 2>/dev/null | head -n 1 || true)"
else
pid=""
fi
if [ -n "$pid" ]; then
ri_log "Sampling hung app (pid=$pid) -> app-hang-sample.txt"
sample "$pid" 5 -file "$ARTIFACTS_DIR/app-hang-sample.txt" >/dev/null 2>&1 || true
else
ri_log "No live ${APP_EXECUTABLE_NAME:-<unknown>} process found to sample"
fi
spam="$(grep -c 'We had a signal' "$TEST_LOG" 2>/dev/null || echo 0)"
if [ "${spam:-0}" -gt 0 ]; then
ri_log "Signal-handler loop detected: ${spam} 'We had a signal' lines in device log (a crashed thread is spinning in ParparVM's SignalHandler; see app-hang-sample.txt for the faulting stack)"
fi
}

END_MARKER="CN1SS:SUITE:FINISHED"
# Per-suite budget (seconds). The 300 -> 600 bump from earlier landed
# back when the suite was ~37 tests; it has since grown to ~90, and the
Expand All @@ -750,6 +783,7 @@ while true; do
NOW="$(date +%s)"
if [ $(( NOW - START_TIME )) -ge $TIMEOUT_SECONDS ]; then
ri_log "STAGE:TIMEOUT -> DeviceRunner did not emit completion marker within ${TIMEOUT_SECONDS}s"
capture_hang_diagnostics
break
fi
sleep 5
Expand All @@ -769,6 +803,17 @@ xcrun simctl spawn "$SIM_DEVICE_ID" \
--predicate '(composedMessage CONTAINS "CN1SS") OR (eventMessage CONTAINS "CN1SS")' \
> "$FALLBACK_LOG" 2>/dev/null || true

# Collect any crash reports the OS wrote for the app during this run
# (simulator app crashes report to the host's DiagnosticReports).
CRASH_REPORT_DIR="$HOME/Library/Logs/DiagnosticReports"
if [ -d "$CRASH_REPORT_DIR" ] && [ -n "$APP_EXECUTABLE_NAME" ]; then
while IFS= read -r crash_file; do
[ -n "$crash_file" ] || continue
ri_log "Collected crash report: $(basename "$crash_file")"
cp -f "$crash_file" "$ARTIFACTS_DIR/" 2>/dev/null || true
done < <(find "$CRASH_REPORT_DIR" -maxdepth 1 -name "${APP_EXECUTABLE_NAME}*" -newer "$LAUNCH_MARKER" 2>/dev/null)
fi

BASE64_STATS_FILE="$ARTIFACTS_DIR/base64-performance-stats.txt"
extract_base64_stats "$BASE64_STATS_FILE" "$TEST_LOG" "$FALLBACK_LOG"
if [ -s "$BASE64_STATS_FILE" ]; then
Expand Down
40 changes: 40 additions & 0 deletions scripts/run-mac-native-ui-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,10 @@ pkill -x "$APP_PROCESS_NAME" >/dev/null 2>&1 || true
sleep 1

rm_log "Launching Mac Catalyst app via LaunchServices: $APP_BUNDLE_PATH"
# Timestamp marker so crash reports written during this run can be picked
# out of ~/Library/Logs/DiagnosticReports afterwards (find -newer).
LAUNCH_MARKER="$SCREENSHOT_TMP_DIR/.launch-marker"
touch "$LAUNCH_MARKER"
LAUNCH_START=$(date +%s)
# `open -W -n -F` waits for the app to terminate, forces a fresh
# instance, and skips state restoration. `--stdout / --stderr` pipe the
Expand Down Expand Up @@ -354,6 +358,28 @@ else
rm_log "Warning: could not resolve pid for $APP_PROCESS_NAME"
fi

# When the suite times out the app is usually not idle: ParparVM's
# SignalHandler (CodenameOne_GLAppDelegate.m) converts SIGSEGV into a Java
# NPE and returns, so a thread that faulted outside a Java try frame
# re-executes the faulting instruction forever -- that is the "We had a
# signal 11" spam seen in device-runner.log when the suite "hangs". A
# process sample taken at timeout therefore contains the exact faulting
# stack (and, for genuine deadlocks, every thread's wait state).
capture_hang_diagnostics() {
local pid spam
pid="$(pgrep -x "$APP_PROCESS_NAME" 2>/dev/null | head -n 1 || true)"
if [ -n "$pid" ]; then
rm_log "Sampling hung app (pid=$pid) -> app-hang-sample.txt"
sample "$pid" 5 -file "$ARTIFACTS_DIR/app-hang-sample.txt" >/dev/null 2>&1 || true
else
rm_log "No live $APP_PROCESS_NAME process found to sample"
fi
spam="$(grep -c 'We had a signal' "$TEST_LOG" 2>/dev/null || echo 0)"
if [ "${spam:-0}" -gt 0 ]; then
rm_log "Signal-handler loop detected: ${spam} 'We had a signal' lines in app stdout (a crashed thread is spinning in ParparVM's SignalHandler; see app-hang-sample.txt for the faulting stack)"
fi
}

END_MARKER="CN1SS:SUITE:FINISHED"
TIMEOUT_SECONDS="${CN1SS_SUITE_TIMEOUT_SECONDS:-1500}"
START_TIME="$(date +%s)"
Expand All @@ -377,6 +403,7 @@ while true; do
NOW="$(date +%s)"
if [ $(( NOW - START_TIME )) -ge $TIMEOUT_SECONDS ]; then
rm_log "STAGE:TIMEOUT -> DeviceRunner did not emit completion marker within ${TIMEOUT_SECONDS}s"
capture_hang_diagnostics
break
fi
sleep 5
Expand Down Expand Up @@ -413,6 +440,19 @@ fi
wait "$APP_PID" 2>/dev/null || true
APP_PID=0

# Collect any crash reports the OS wrote for the app during this run
# (covers the case where the process died outright instead of spinning in
# the signal handler -- LaunchServices apps report to DiagnosticReports,
# not to our stdout pipe).
CRASH_REPORT_DIR="$HOME/Library/Logs/DiagnosticReports"
if [ -d "$CRASH_REPORT_DIR" ]; then
while IFS= read -r crash_file; do
[ -n "$crash_file" ] || continue
rm_log "Collected crash report: $(basename "$crash_file")"
cp -f "$crash_file" "$ARTIFACTS_DIR/" 2>/dev/null || true
done < <(find "$CRASH_REPORT_DIR" -maxdepth 1 -name "${APP_PROCESS_NAME}*" -newer "$LAUNCH_MARKER" 2>/dev/null)
fi

# The app has exited; stop the WebSocket server and adopt whatever it
# received. The server wrote one <test>.png per delivered screenshot into
# $WS_RAW_DIR. When WS delivered at least one image we use that set directly
Expand Down
59 changes: 42 additions & 17 deletions vm/ByteCodeTranslator/src/cn1_globals.m
Original file line number Diff line number Diff line change
Expand Up @@ -546,11 +546,22 @@ void placeObjectInHeapCollection(JAVA_OBJECT obj) {
memset(tmpAllObjectsInHeap + sizeOfAllObjectsInHeap, 0, sizeof(JAVA_OBJECT) * sizeOfAllObjectsInHeap);
memcpy(tmpAllObjectsInHeap, allObjectsInHeap, sizeof(JAVA_OBJECT) * sizeOfAllObjectsInHeap);
sizeOfAllObjectsInHeap *= 2;
// Defer freeing the replaced array by one growth cycle: the sweep and the
// reference-counting removal path read allObjectsInHeap without taking the
// critical section, so an immediate free can pull the array out from under
// an in-flight read. Growths double the capacity so they are rare, and at
// most one stale array is retained.
if(oldAllObjectsInHeap != 0) {
free(oldAllObjectsInHeap);
}
oldAllObjectsInHeap = allObjectsInHeap;
allObjectsInHeap = tmpAllObjectsInHeap;
allObjectsInHeap[currentSizeOfAllObjectsInHeap] = obj;
// record the real slot -- leaving pos at -1 here left the object's
// __heapPosition unset, so a later reference-counted free could not null
// its slot and the sweep would dereference the dangling pointer.
pos = currentSizeOfAllObjectsInHeap;
allObjectsInHeap[pos] = obj;
currentSizeOfAllObjectsInHeap++;
free(oldAllObjectsInHeap);
} else {
allObjectsInHeap[pos] = obj;
}
Expand Down Expand Up @@ -628,23 +639,37 @@ void codenameOneGCMark() {
}
}

// place allocations from the local thread into the global heap list
if (!t->lightweightThread) {
// For native threads, we need to actually lock them while we traverse the
// heap allocations because we can't use the usual locking mechanisms on
// them.
lockThreadHeapMutex();
}
for(int heapTrav = 0 ; heapTrav < t->heapAllocationSize ; heapTrav++) {
JAVA_OBJECT obj = (JAVA_OBJECT)t->pendingHeapAllocations[heapTrav];
if(obj) {
t->pendingHeapAllocations[heapTrav] = 0;
placeObjectInHeapCollection(obj);
// place allocations from the local thread into the global heap list.
// The critical section serializes this migration against
// markDeadThread/collectThreadResources: the pause-wait above ends when
// threadActive drops, but a thread that finishes runImpl drops
// threadActive through markDeadThread, so without the lock both sides
// migrate the same pendingHeapAllocations concurrently -- double-placing
// objects and racing placeObjectInHeapCollection's grow-and-free of
// allObjectsInHeap (double free / use-after-free, observed as random
// SIGSEGV or a libmalloc abort that wedges the VM). If the slot no
// longer holds this thread it died and markDeadThread already migrated
// everything under this same lock; skip.
lockCriticalSection();
if(allThreads[iter] == t) {
if (!t->lightweightThread) {
// For native threads, we need to actually lock them while we traverse the
// heap allocations because we can't use the usual locking mechanisms on
// them.
lockThreadHeapMutex();
}
for(int heapTrav = 0 ; heapTrav < t->heapAllocationSize ; heapTrav++) {
JAVA_OBJECT obj = (JAVA_OBJECT)t->pendingHeapAllocations[heapTrav];
if(obj) {
t->pendingHeapAllocations[heapTrav] = 0;
placeObjectInHeapCollection(obj);
}
}
if (!t->lightweightThread) {
unlockThreadHeapMutex();
}
}
if (!t->lightweightThread) {
unlockThreadHeapMutex();
}
unlockCriticalSection();

// this is a thread that allocates a lot and might demolish RAM. We will hold it until the sweep is finished...

Expand Down
Loading