From 0eb96ee845c35a732a1f8e1644029043b20f8585 Mon Sep 17 00:00:00 2001 From: Bob Weinand Date: Wed, 21 Jan 2026 17:01:51 +0100 Subject: [PATCH 01/14] Test flf trampoline Signed-off-by: Bob Weinand --- Cargo.lock | 41 ++++++++++++++++++- profiling/Cargo.toml | 1 + profiling/build.rs | 18 ++++++++- profiling/src/php_ffi.c | 4 ++ profiling/src/php_ffi.h | 4 ++ profiling/src/wall_time.rs | 82 ++++++++++++++++++++++++++++++++++++++ 6 files changed, 147 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e8273bf6046..07821f75264 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -390,7 +390,7 @@ dependencies = [ "cpp_demangle", "gimli 0.32.0", "libc 0.2.177", - "memmap2", + "memmap2 0.9.5", "miniz_oxide", "rustc-demangle", ] @@ -1245,6 +1245,7 @@ dependencies = [ "criterion-perf-events", "crossbeam-channel", "datadog-php-profiling", + "dynasmrt", "env_logger 0.11.6", "lazy_static", "libc 0.2.177", @@ -1506,6 +1507,33 @@ version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0d6ef0072f8a535281e4876be788938b528e9a1d43900b82c2569af7da799125" +[[package]] +name = "dynasm" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33dc03612f42465a8ed7f5e354bc2b79ba54cedefa81d5bd3a064f1835adaba8" +dependencies = [ + "bitflags 1.3.2", + "byteorder", + "lazy_static", + "proc-macro-error", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "dynasmrt" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7dccc31a678058996aef614f6bd418ced384da70f284e83e2b7bf29b27b6a28" +dependencies = [ + "byteorder", + "dynasm", + "fnv", + "memmap2 0.5.10", +] + [[package]] name = "educe" version = "0.4.23" @@ -3042,6 +3070,15 @@ dependencies = [ "rustix", ] +[[package]] +name = "memmap2" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327" +dependencies = [ + "libc 0.2.177", +] + [[package]] name = "memmap2" version = "0.9.5" @@ -4650,7 +4687,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13a4dfe4bbeef59c1f32fc7524ae7c95b9e1de5e79a43ce1604e181081d71b0c" dependencies = [ "debugid", - "memmap2", + "memmap2 0.9.5", "stable_deref_trait", "uuid", ] diff --git a/profiling/Cargo.toml b/profiling/Cargo.toml index a2d182de4ba..4968a33e81b 100644 --- a/profiling/Cargo.toml +++ b/profiling/Cargo.toml @@ -22,6 +22,7 @@ cfg-if = { version = "1.0" } cpu-time = { version = "1.0" } chrono = { version = "0.4" } crossbeam-channel = { version = "0.5", default-features = false, features = ["std"] } +dynasmrt = "2.0" libdd-alloc = { git = "https://github.com/DataDog/libdatadog", tag = "v25.0.0" } libdd-profiling = { git = "https://github.com/DataDog/libdatadog", tag = "v25.0.0" } libdd-common = { git = "https://github.com/DataDog/libdatadog", tag = "v25.0.0" } diff --git a/profiling/build.rs b/profiling/build.rs index 9876a13cec2..77eea6bd521 100644 --- a/profiling/build.rs +++ b/profiling/build.rs @@ -37,6 +37,7 @@ fn main() { let post_startup_cb = cfg_post_startup_cb(vernum); let preload = cfg_preload(vernum); let fibers = cfg_fibers(vernum); + let frameless = cfg_frameless(vernum); let run_time_cache = cfg_run_time_cache(vernum); let trigger_time_sample = cfg_trigger_time_sample(); let zend_error_observer = cfg_zend_error_observer(vernum); @@ -48,6 +49,7 @@ fn main() { preload, run_time_cache, fibers, + frameless, trigger_time_sample, zend_error_observer, ); @@ -103,6 +105,7 @@ fn build_zend_php_ffis( preload: bool, run_time_cache: bool, fibers: bool, + frameless: bool, trigger_time_sample: bool, zend_error_observer: bool, ) { @@ -143,6 +146,7 @@ fn build_zend_php_ffis( let post_startup_cb = if post_startup_cb { "1" } else { "0" }; let preload = if preload { "1" } else { "0" }; let fibers = if fibers { "1" } else { "0" }; + let frameless = if frameless { "1" } else { "0" }; let run_time_cache = if run_time_cache { "1" } else { "0" }; let trigger_time_sample = if trigger_time_sample { "1" } else { "0" }; let zend_error_observer = if zend_error_observer { "1" } else { "0" }; @@ -159,6 +163,7 @@ fn build_zend_php_ffis( .define("CFG_POST_STARTUP_CB", post_startup_cb) .define("CFG_PRELOAD", preload) .define("CFG_FIBERS", fibers) + .define("CFG_FRAMELESS", frameless) .define("CFG_RUN_TIME_CACHE", run_time_cache) .define("CFG_STACK_WALKING_TESTS", stack_walking_tests) .define("CFG_TRIGGER_TIME_SAMPLE", trigger_time_sample) @@ -394,6 +399,18 @@ fn cfg_fibers(vernum: u64) -> bool { } } +fn cfg_frameless(vernum: u64) -> bool { + if has_check_cfg() { + println!("cargo::rustc-check-cfg=cfg(php_frameless)"); + } + if vernum >= 80400 { + println!("cargo:rustc-cfg=php_frameless"); + true + } else { + false + } +} + fn cfg_php_feature_flags(vernum: u64) { if has_check_cfg() { println!("cargo::rustc-check-cfg=cfg(php_gc_status, php_zend_compile_string_has_position, php_gc_status_extended, php_frameless, php_opcache_restart_hook, php_zend_mm_set_custom_handlers_ex)"); @@ -409,7 +426,6 @@ fn cfg_php_feature_flags(vernum: u64) { println!("cargo:rustc-cfg=php_gc_status_extended"); } if vernum >= 80400 { - println!("cargo:rustc-cfg=php_frameless"); println!("cargo:rustc-cfg=php_opcache_restart_hook"); println!("cargo:rustc-cfg=php_zend_mm_set_custom_handlers_ex"); } diff --git a/profiling/src/php_ffi.c b/profiling/src/php_ffi.c index 698d8775080..685a22c37e8 100644 --- a/profiling/src/php_ffi.c +++ b/profiling/src/php_ffi.c @@ -117,6 +117,10 @@ static post_startup_cb_result ddog_php_prof_post_startup_cb(void) { return FAILURE; } } + +#if CFG_FRAMELESS + ddog_php_prof_post_startup(); +#endif _is_post_startup = true; diff --git a/profiling/src/php_ffi.h b/profiling/src/php_ffi.h index d5ee0f97081..99022fa8189 100644 --- a/profiling/src/php_ffi.h +++ b/profiling/src/php_ffi.h @@ -147,6 +147,10 @@ void ddog_php_prof_zend_mm_set_custom_handlers(zend_mm_heap *heap, zend_execute_data* ddog_php_prof_get_current_execute_data(); +#if CFG_FRAMELESS +void ddog_php_prof_post_startup(); +#endif + #if CFG_FIBERS zend_fiber* ddog_php_prof_get_active_fiber(); zend_fiber* ddog_php_prof_get_active_fiber_test(); diff --git a/profiling/src/wall_time.rs b/profiling/src/wall_time.rs index bfd9390c90d..42c379dd266 100644 --- a/profiling/src/wall_time.rs +++ b/profiling/src/wall_time.rs @@ -137,6 +137,88 @@ pub extern "C" fn ddog_php_prof_interrupt_function(execute_data: *mut zend_execu } } +#[cfg(php_frameless)] +mod frameless { + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] + use crate::bindings::{zend_flf_functions, zend_flf_handlers}; + + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] + mod trampoline { + use crate::wall_time::ddog_php_prof_interrupt_function; + #[cfg(target_arch = "aarch64")] + use dynasmrt::aarch64::Assembler; + #[cfg(target_arch = "x86_64")] + use dynasmrt::x64::Assembler; + use dynasmrt::{dynasm, DynasmApi}; + use std::ffi::c_void; + + pub unsafe fn generate_wrapper(original: *mut c_void) -> *mut c_void { + let mut assembler = Assembler::new().unwrap(); + let interrupt_addr = ddog_php_prof_interrupt_function as *const (); + #[cfg(target_arch = "aarch64")] + dynasm!(assembler + ; mov x16, original as u64 + ; blr x16 + ; mov x0, 0 + ; mov x16, interrupt_addr as u64 + ; blr x16 + ; ret + ); + #[cfg(target_arch = "x86_64")] + dynasm!(assembler + ; mov rax, QWORD original as i64 + ; call rax + ; mov rdi, 0 + ; mov rax, QWORD interrupt_addr as i64 + ; call rax + ; ret + ); + let buffer = assembler.finalize().unwrap(); + let ptr = buffer.as_ptr() as *mut c_void; + std::mem::forget(buffer); // TODO: leaks memory + ptr + } + } + + #[no_mangle] + pub unsafe extern "C" fn ddog_php_prof_post_startup() { + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] + { + let mut i = 0; + loop { + let original = *zend_flf_handlers.add(i); + if original.is_null() { + break; + } + let wrapper = trampoline::generate_wrapper(original); + *zend_flf_handlers.add(i) = wrapper; + let func = &mut **zend_flf_functions.add(i); + let original_info = (*func).internal_function.frameless_function_infos; + let mut infos = Vec::new(); + let mut ptr = original_info; + loop { + let info = *ptr; + infos.push(info); + if info.handler.is_null() { + break; + } + ptr = ptr.add(1); + } + for info in infos.iter_mut() { + if info.handler == original { + info.handler = wrapper; + } + } + infos.push(crate::bindings::zend_frameless_function_info { handler: std::ptr::null_mut(), num_args: 0 }); + let new_infos = infos.into_boxed_slice(); + (*func).internal_function.frameless_function_infos = new_infos.as_ptr() as *mut _; + std::mem::forget(new_infos); // TODO: leaks memory + i += 1; + } + } + } +} + /// A wrapper for the `ddog_php_prof_interrupt_function` to call the /// previous interrupt handler, if there was one. #[no_mangle] From c6bdefc15fb6d10d6e7645a42d6ca7cf94da228d Mon Sep 17 00:00:00 2001 From: Bob Weinand Date: Wed, 21 Jan 2026 18:59:08 +0100 Subject: [PATCH 02/14] Use EG(current_execute_data) Signed-off-by: Bob Weinand --- profiling/src/wall_time.rs | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/profiling/src/wall_time.rs b/profiling/src/wall_time.rs index 42c379dd266..28ef52687f8 100644 --- a/profiling/src/wall_time.rs +++ b/profiling/src/wall_time.rs @@ -144,22 +144,22 @@ mod frameless { #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] mod trampoline { - use crate::wall_time::ddog_php_prof_interrupt_function; #[cfg(target_arch = "aarch64")] use dynasmrt::aarch64::Assembler; #[cfg(target_arch = "x86_64")] use dynasmrt::x64::Assembler; use dynasmrt::{dynasm, DynasmApi}; use std::ffi::c_void; + use super::super::ddog_php_prof_interrupt_function; + use crate::zend; pub unsafe fn generate_wrapper(original: *mut c_void) -> *mut c_void { let mut assembler = Assembler::new().unwrap(); - let interrupt_addr = ddog_php_prof_interrupt_function as *const (); + let interrupt_addr = ddog_php_prof_icall_trampoline_target as *const (); #[cfg(target_arch = "aarch64")] dynasm!(assembler ; mov x16, original as u64 ; blr x16 - ; mov x0, 0 ; mov x16, interrupt_addr as u64 ; blr x16 ; ret @@ -168,7 +168,6 @@ mod frameless { dynasm!(assembler ; mov rax, QWORD original as i64 ; call rax - ; mov rdi, 0 ; mov rax, QWORD interrupt_addr as i64 ; call rax ; ret @@ -178,6 +177,12 @@ mod frameless { std::mem::forget(buffer); // TODO: leaks memory ptr } + + #[no_mangle] + #[inline(never)] + pub unsafe extern "C" fn ddog_php_prof_icall_trampoline_target() { + ddog_php_prof_interrupt_function(zend::ddog_php_prof_get_current_execute_data()); + } } #[no_mangle] @@ -193,7 +198,7 @@ mod frameless { let wrapper = trampoline::generate_wrapper(original); *zend_flf_handlers.add(i) = wrapper; let func = &mut **zend_flf_functions.add(i); - let original_info = (*func).internal_function.frameless_function_infos; + let original_info = func.internal_function.frameless_function_infos; let mut infos = Vec::new(); let mut ptr = original_info; loop { @@ -211,7 +216,7 @@ mod frameless { } infos.push(crate::bindings::zend_frameless_function_info { handler: std::ptr::null_mut(), num_args: 0 }); let new_infos = infos.into_boxed_slice(); - (*func).internal_function.frameless_function_infos = new_infos.as_ptr() as *mut _; + func.internal_function.frameless_function_infos = new_infos.as_ptr() as *mut _; std::mem::forget(new_infos); // TODO: leaks memory i += 1; } From 4358ab8f9298ec932e47ac4939404dc8a11dbe55 Mon Sep 17 00:00:00 2001 From: Bob Weinand Date: Wed, 21 Jan 2026 19:16:43 +0100 Subject: [PATCH 03/14] Remove redundant ret Signed-off-by: Bob Weinand --- profiling/src/wall_time.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/profiling/src/wall_time.rs b/profiling/src/wall_time.rs index 28ef52687f8..7fc6eacd45b 100644 --- a/profiling/src/wall_time.rs +++ b/profiling/src/wall_time.rs @@ -154,6 +154,7 @@ mod frameless { use crate::zend; pub unsafe fn generate_wrapper(original: *mut c_void) -> *mut c_void { + // Calls original function, then calls interrupt function. let mut assembler = Assembler::new().unwrap(); let interrupt_addr = ddog_php_prof_icall_trampoline_target as *const (); #[cfg(target_arch = "aarch64")] @@ -161,16 +162,14 @@ mod frameless { ; mov x16, original as u64 ; blr x16 ; mov x16, interrupt_addr as u64 - ; blr x16 - ; ret + ; br x16 // tail call ); #[cfg(target_arch = "x86_64")] dynasm!(assembler ; mov rax, QWORD original as i64 ; call rax ; mov rax, QWORD interrupt_addr as i64 - ; call rax - ; ret + ; jmp rax // tail call ); let buffer = assembler.finalize().unwrap(); let ptr = buffer.as_ptr() as *mut c_void; @@ -181,6 +180,7 @@ mod frameless { #[no_mangle] #[inline(never)] pub unsafe extern "C" fn ddog_php_prof_icall_trampoline_target() { + // TODO: First check for REQUEST_LOCALS.interrupt_count before fetching execute data to make this less expensive ddog_php_prof_interrupt_function(zend::ddog_php_prof_get_current_execute_data()); } } @@ -198,6 +198,8 @@ mod frameless { let wrapper = trampoline::generate_wrapper(original); *zend_flf_handlers.add(i) = wrapper; let func = &mut **zend_flf_functions.add(i); + + // We need to do copies of frameless_function_infos as they may be readonly memory let original_info = func.internal_function.frameless_function_infos; let mut infos = Vec::new(); let mut ptr = original_info; @@ -214,7 +216,6 @@ mod frameless { info.handler = wrapper; } } - infos.push(crate::bindings::zend_frameless_function_info { handler: std::ptr::null_mut(), num_args: 0 }); let new_infos = infos.into_boxed_slice(); func.internal_function.frameless_function_infos = new_infos.as_ptr() as *mut _; std::mem::forget(new_infos); // TODO: leaks memory From df41452adccbfb73bec70b14447ea921f0f16642 Mon Sep 17 00:00:00 2001 From: Bob Weinand Date: Wed, 21 Jan 2026 19:53:08 +0100 Subject: [PATCH 04/14] Batch allocate all trampolines Signed-off-by: Bob Weinand --- profiling/src/wall_time.rs | 89 +++++++++++++++++++++----------------- 1 file changed, 49 insertions(+), 40 deletions(-) diff --git a/profiling/src/wall_time.rs b/profiling/src/wall_time.rs index 7fc6eacd45b..f7a5c3f5954 100644 --- a/profiling/src/wall_time.rs +++ b/profiling/src/wall_time.rs @@ -140,7 +140,6 @@ pub extern "C" fn ddog_php_prof_interrupt_function(execute_data: *mut zend_execu #[cfg(php_frameless)] mod frameless { #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] - use crate::bindings::{zend_flf_functions, zend_flf_handlers}; #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] mod trampoline { @@ -151,51 +150,48 @@ mod frameless { use dynasmrt::{dynasm, DynasmApi}; use std::ffi::c_void; use super::super::ddog_php_prof_interrupt_function; + use crate::bindings::{zend_flf_functions, zend_flf_handlers}; use crate::zend; - pub unsafe fn generate_wrapper(original: *mut c_void) -> *mut c_void { - // Calls original function, then calls interrupt function. - let mut assembler = Assembler::new().unwrap(); - let interrupt_addr = ddog_php_prof_icall_trampoline_target as *const (); - #[cfg(target_arch = "aarch64")] - dynasm!(assembler - ; mov x16, original as u64 - ; blr x16 - ; mov x16, interrupt_addr as u64 - ; br x16 // tail call - ); - #[cfg(target_arch = "x86_64")] - dynasm!(assembler - ; mov rax, QWORD original as i64 - ; call rax - ; mov rax, QWORD interrupt_addr as i64 - ; jmp rax // tail call - ); - let buffer = assembler.finalize().unwrap(); - let ptr = buffer.as_ptr() as *mut c_void; - std::mem::forget(buffer); // TODO: leaks memory - ptr - } - - #[no_mangle] - #[inline(never)] - pub unsafe extern "C" fn ddog_php_prof_icall_trampoline_target() { - // TODO: First check for REQUEST_LOCALS.interrupt_count before fetching execute data to make this less expensive - ddog_php_prof_interrupt_function(zend::ddog_php_prof_get_current_execute_data()); - } - } - - #[no_mangle] - pub unsafe extern "C" fn ddog_php_prof_post_startup() { - #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] - { + pub unsafe fn install() { + // Collect frameless functions ahead of time to batch-process them. + // Otherwise we get a new memory page per function. + let mut originals = Vec::new(); let mut i = 0; loop { let original = *zend_flf_handlers.add(i); if original.is_null() { break; } - let wrapper = trampoline::generate_wrapper(original); + originals.push(original); + i += 1; + } + + let mut assembler = Assembler::new().unwrap(); + let interrupt_addr = ddog_php_prof_icall_trampoline_target as *const (); + let mut offsets = Vec::new(); // keep function offsets + for orig in originals.iter() { + offsets.push(assembler.offset()); + // Calls original function, then calls interrupt function. + #[cfg(target_arch = "aarch64")] + dynasm!(assembler + ; mov x16, *orig as u64 + ; blr x16 + ; mov x16, interrupt_addr as u64 + ; br x16 // tail call + ); + #[cfg(target_arch = "x86_64")] + dynasm!(assembler + ; mov rax, QWORD *orig as i64 + ; call rax + ; mov rax, QWORD interrupt_addr as i64 + ; jmp rax // tail call + ); + } + + let buffer = assembler.finalize().unwrap(); + for (i, offset) in offsets.iter().enumerate() { + let wrapper = buffer.as_ptr().add(offset.0) as *mut c_void; *zend_flf_handlers.add(i) = wrapper; let func = &mut **zend_flf_functions.add(i); @@ -212,16 +208,29 @@ mod frameless { ptr = ptr.add(1); } for info in infos.iter_mut() { - if info.handler == original { + if info.handler == originals[i] { info.handler = wrapper; } } let new_infos = infos.into_boxed_slice(); func.internal_function.frameless_function_infos = new_infos.as_ptr() as *mut _; std::mem::forget(new_infos); // TODO: leaks memory - i += 1; } + std::mem::forget(buffer); // TODO: leaks memory } + + #[no_mangle] + #[inline(never)] + pub unsafe extern "C" fn ddog_php_prof_icall_trampoline_target() { + // TODO: First check for REQUEST_LOCALS.interrupt_count before fetching execute data to make this less expensive + ddog_php_prof_interrupt_function(zend::ddog_php_prof_get_current_execute_data()); + } + } + + #[no_mangle] + pub unsafe extern "C" fn ddog_php_prof_post_startup() { + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] + trampoline::install(); } } From 60c7da931206967eb94ef69e579e3f8f875d75dc Mon Sep 17 00:00:00 2001 From: Bob Weinand Date: Wed, 21 Jan 2026 20:21:30 +0100 Subject: [PATCH 05/14] Batch allocate infos as well for easier cleanup Signed-off-by: Bob Weinand --- profiling/src/wall_time.rs | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/profiling/src/wall_time.rs b/profiling/src/wall_time.rs index f7a5c3f5954..12aef50bfc6 100644 --- a/profiling/src/wall_time.rs +++ b/profiling/src/wall_time.rs @@ -189,6 +189,9 @@ mod frameless { ); } + // Allocate enough space for all frameless_function_infos including trailing NULLs + let mut infos = Vec::with_capacity(originals.len() * 2); + let buffer = assembler.finalize().unwrap(); for (i, offset) in offsets.iter().enumerate() { let wrapper = buffer.as_ptr().add(offset.0) as *mut c_void; @@ -196,8 +199,8 @@ mod frameless { let func = &mut **zend_flf_functions.add(i); // We need to do copies of frameless_function_infos as they may be readonly memory + let info_size = infos.len(); let original_info = func.internal_function.frameless_function_infos; - let mut infos = Vec::new(); let mut ptr = original_info; loop { let info = *ptr; @@ -212,10 +215,9 @@ mod frameless { info.handler = wrapper; } } - let new_infos = infos.into_boxed_slice(); - func.internal_function.frameless_function_infos = new_infos.as_ptr() as *mut _; - std::mem::forget(new_infos); // TODO: leaks memory + func.internal_function.frameless_function_infos = infos.as_ptr().add(info_size) as *mut _; } + std::mem::forget(infos); // TODO: leaks memory std::mem::forget(buffer); // TODO: leaks memory } From 6307860ae3e615f029b8401a4b69d2f72467801a Mon Sep 17 00:00:00 2001 From: Bob Weinand Date: Wed, 21 Jan 2026 20:34:05 +0100 Subject: [PATCH 06/14] Store aarch64 link register; and fix stack align on x86_64 Signed-off-by: Bob Weinand --- profiling/src/wall_time.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/profiling/src/wall_time.rs b/profiling/src/wall_time.rs index 12aef50bfc6..b11aca35d5e 100644 --- a/profiling/src/wall_time.rs +++ b/profiling/src/wall_time.rs @@ -139,8 +139,6 @@ pub extern "C" fn ddog_php_prof_interrupt_function(execute_data: *mut zend_execu #[cfg(php_frameless)] mod frameless { - #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] - #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] mod trampoline { #[cfg(target_arch = "aarch64")] @@ -175,15 +173,20 @@ mod frameless { // Calls original function, then calls interrupt function. #[cfg(target_arch = "aarch64")] dynasm!(assembler + ; stp x29, x30, [sp, -16]! // save link register and allow clobber of x29 + ; mov x29, sp // store stack pointer ; mov x16, *orig as u64 ; blr x16 + ; ldp x29, x30, [sp], 16 // restore link register and x29 ; mov x16, interrupt_addr as u64 ; br x16 // tail call ); #[cfg(target_arch = "x86_64")] dynasm!(assembler + ; push rbp // align stack ; mov rax, QWORD *orig as i64 ; call rax + ; pop rbp // restore stack ; mov rax, QWORD interrupt_addr as i64 ; jmp rax // tail call ); From bcebfb04d11adca26857b2aa4cffa381c6110da6 Mon Sep 17 00:00:00 2001 From: Bob Weinand Date: Wed, 21 Jan 2026 21:37:35 +0100 Subject: [PATCH 07/14] Avoid updating infos multiple times Signed-off-by: Bob Weinand --- profiling/src/wall_time.rs | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/profiling/src/wall_time.rs b/profiling/src/wall_time.rs index b11aca35d5e..a5387cf3443 100644 --- a/profiling/src/wall_time.rs +++ b/profiling/src/wall_time.rs @@ -196,29 +196,35 @@ mod frameless { let mut infos = Vec::with_capacity(originals.len() * 2); let buffer = assembler.finalize().unwrap(); + let mut last_infos = std::ptr::null_mut(); for (i, offset) in offsets.iter().enumerate() { let wrapper = buffer.as_ptr().add(offset.0) as *mut c_void; *zend_flf_handlers.add(i) = wrapper; let func = &mut **zend_flf_functions.add(i); // We need to do copies of frameless_function_infos as they may be readonly memory - let info_size = infos.len(); let original_info = func.internal_function.frameless_function_infos; + if original_info == last_infos { + continue; + } + + let info_size = infos.len(); let mut ptr = original_info; loop { + let idx = infos.len(); let info = *ptr; infos.push(info); if info.handler.is_null() { break; } - ptr = ptr.add(1); - } - for info in infos.iter_mut() { + let info = &mut infos[idx]; if info.handler == originals[i] { info.handler = wrapper; } + ptr = ptr.add(1); } - func.internal_function.frameless_function_infos = infos.as_ptr().add(info_size) as *mut _; + last_infos = infos.as_ptr().add(info_size) as *mut _; + func.internal_function.frameless_function_infos = last_infos; } std::mem::forget(infos); // TODO: leaks memory std::mem::forget(buffer); // TODO: leaks memory From a40771221c66eca3b53a964a64ebb298fad50f90 Mon Sep 17 00:00:00 2001 From: Bob Weinand Date: Wed, 21 Jan 2026 22:24:08 +0100 Subject: [PATCH 08/14] Fix aarch64 asm with immediates Signed-off-by: Bob Weinand --- profiling/src/wall_time.rs | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/profiling/src/wall_time.rs b/profiling/src/wall_time.rs index a5387cf3443..56456f8fcdd 100644 --- a/profiling/src/wall_time.rs +++ b/profiling/src/wall_time.rs @@ -143,6 +143,8 @@ mod frameless { mod trampoline { #[cfg(target_arch = "aarch64")] use dynasmrt::aarch64::Assembler; + #[cfg(target_arch = "aarch64")] + use dynasmrt::DynasmLabelApi; #[cfg(target_arch = "x86_64")] use dynasmrt::x64::Assembler; use dynasmrt::{dynasm, DynasmApi}; @@ -172,15 +174,19 @@ mod frameless { offsets.push(assembler.offset()); // Calls original function, then calls interrupt function. #[cfg(target_arch = "aarch64")] - dynasm!(assembler - ; stp x29, x30, [sp, -16]! // save link register and allow clobber of x29 - ; mov x29, sp // store stack pointer - ; mov x16, *orig as u64 - ; blr x16 - ; ldp x29, x30, [sp], 16 // restore link register and x29 - ; mov x16, interrupt_addr as u64 - ; br x16 // tail call - ); + { + // We need labels on aarch64 as immediates cannot be more than 16 bits + dynasm!(assembler + ; stp x29, x30, [sp, -16]! // save link register and allow clobber of x29 + ; mov x29, sp // store stack pointer + ; ldr x16, >label + ; blr x16 + ; ldp x29, x30, [sp], 16 // restore link register and x29 + ; ldr x16, >interrupt_label + ; br x16 // tail call + ; label: ; .qword *orig as i64 + ); + } #[cfg(target_arch = "x86_64")] dynasm!(assembler ; push rbp // align stack @@ -191,6 +197,9 @@ mod frameless { ; jmp rax // tail call ); } + #[cfg(target_arch = "aarch64")] + dynasm!(assembler + ; interrupt_label: ; .qword interrupt_addr as i64 ); // Allocate enough space for all frameless_function_infos including trailing NULLs let mut infos = Vec::with_capacity(originals.len() * 2); From b9fc37000ffea246ff0a3a0b0ed097ec4ec5b931 Mon Sep 17 00:00:00 2001 From: Bob Weinand Date: Thu, 22 Jan 2026 14:14:41 +0100 Subject: [PATCH 09/14] Fix flf functions with multiple handlers Signed-off-by: Bob Weinand --- profiling/src/wall_time.rs | 30 +++++++++++++++++------------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/profiling/src/wall_time.rs b/profiling/src/wall_time.rs index 56456f8fcdd..48c8a51bc44 100644 --- a/profiling/src/wall_time.rs +++ b/profiling/src/wall_time.rs @@ -179,12 +179,12 @@ mod frameless { dynasm!(assembler ; stp x29, x30, [sp, -16]! // save link register and allow clobber of x29 ; mov x29, sp // store stack pointer - ; ldr x16, >label + ; ldr x16, >orig_label ; blr x16 ; ldp x29, x30, [sp], 16 // restore link register and x29 ; ldr x16, >interrupt_label ; br x16 // tail call - ; label: ; .qword *orig as i64 + ; orig_label: ; .qword *orig as i64 ); } #[cfg(target_arch = "x86_64")] @@ -213,27 +213,31 @@ mod frameless { // We need to do copies of frameless_function_infos as they may be readonly memory let original_info = func.internal_function.frameless_function_infos; - if original_info == last_infos { - continue; + if original_info != last_infos { + let info_size = infos.len(); + let mut ptr = original_info; + loop { + let info = *ptr; + infos.push(info); + if info.handler.is_null() { + break; + } + ptr = ptr.add(1); + } + last_infos = infos.as_ptr().add(info_size) as *mut _; + func.internal_function.frameless_function_infos = last_infos; } - - let info_size = infos.len(); - let mut ptr = original_info; + let mut ptr = last_infos; loop { - let idx = infos.len(); - let info = *ptr; - infos.push(info); + let info = &mut *ptr; if info.handler.is_null() { break; } - let info = &mut infos[idx]; if info.handler == originals[i] { info.handler = wrapper; } ptr = ptr.add(1); } - last_infos = infos.as_ptr().add(info_size) as *mut _; - func.internal_function.frameless_function_infos = last_infos; } std::mem::forget(infos); // TODO: leaks memory std::mem::forget(buffer); // TODO: leaks memory From d9d3f43c18f0ab725ca6bf03c4ad324281d00da8 Mon Sep 17 00:00:00 2001 From: Bob Weinand Date: Thu, 22 Jan 2026 14:42:31 +0100 Subject: [PATCH 10/14] Resolve TODOs Signed-off-by: Bob Weinand --- profiling/src/php_ffi.c | 8 +++---- profiling/src/wall_time.rs | 43 ++++++++++++++++++++++++++++++-------- 2 files changed, 38 insertions(+), 13 deletions(-) diff --git a/profiling/src/php_ffi.c b/profiling/src/php_ffi.c index 685a22c37e8..84e29ad741c 100644 --- a/profiling/src/php_ffi.c +++ b/profiling/src/php_ffi.c @@ -109,6 +109,10 @@ bool ddog_php_prof_is_post_startup(void) { static post_startup_cb_result (*orig_post_startup_cb)(void) = NULL; static post_startup_cb_result ddog_php_prof_post_startup_cb(void) { +#if CFG_FRAMELESS + ddog_php_prof_post_startup(); // before preload+JIT (which may hardcode the flf handlers) +#endif + if (orig_post_startup_cb) { post_startup_cb_result (*cb)(void) = orig_post_startup_cb; @@ -118,10 +122,6 @@ static post_startup_cb_result ddog_php_prof_post_startup_cb(void) { } } -#if CFG_FRAMELESS - ddog_php_prof_post_startup(); -#endif - _is_post_startup = true; return SUCCESS; diff --git a/profiling/src/wall_time.rs b/profiling/src/wall_time.rs index 48c8a51bc44..6fa542b429a 100644 --- a/profiling/src/wall_time.rs +++ b/profiling/src/wall_time.rs @@ -147,11 +147,16 @@ mod frameless { use dynasmrt::DynasmLabelApi; #[cfg(target_arch = "x86_64")] use dynasmrt::x64::Assembler; - use dynasmrt::{dynasm, DynasmApi}; + use dynasmrt::{dynasm, DynasmApi, ExecutableBuffer}; use std::ffi::c_void; - use super::super::ddog_php_prof_interrupt_function; - use crate::bindings::{zend_flf_functions, zend_flf_handlers}; - use crate::zend; + use std::sync::atomic::Ordering; + use log::debug; + use crate::bindings::{zend_flf_functions, zend_flf_handlers, zend_frameless_function_info}; + use crate::{profiling::Profiler, RefCellExt, REQUEST_LOCALS, zend}; + + // This ensures that the memory stays reachable and is replaced on apache reload for example + static mut INFOS: Vec = Vec::new(); + static mut BUFFER: Option = None; pub unsafe fn install() { // Collect frameless functions ahead of time to batch-process them. @@ -239,15 +244,35 @@ mod frameless { ptr = ptr.add(1); } } - std::mem::forget(infos); // TODO: leaks memory - std::mem::forget(buffer); // TODO: leaks memory + + INFOS = infos; + BUFFER = Some(buffer); } #[no_mangle] #[inline(never)] - pub unsafe extern "C" fn ddog_php_prof_icall_trampoline_target() { - // TODO: First check for REQUEST_LOCALS.interrupt_count before fetching execute data to make this less expensive - ddog_php_prof_interrupt_function(zend::ddog_php_prof_get_current_execute_data()); + pub extern "C" fn ddog_php_prof_icall_trampoline_target() { + let result = REQUEST_LOCALS.try_with_borrow(|locals| { + if !locals.system_settings().profiling_enabled { + return; + } + + // Check whether we are actually wanting an interrupt to be handled. + let interrupt_count = locals.interrupt_count.swap(0, Ordering::SeqCst); + if interrupt_count == 0 { + return; + } + + if let Some(profiler) = Profiler::get() { + // SAFETY: profiler doesn't mutate execute_data + let execute_data = unsafe { zend::ddog_php_prof_get_current_execute_data() }; + profiler.collect_time(execute_data, interrupt_count); + } + }); + + if let Err(err) = result { + debug!("ddog_php_prof_icall_trampoline_target failed to borrow request locals: {err}"); + } } } From 2f81b3a636f4b5900975d0dbd9fa3c210d55923f Mon Sep 17 00:00:00 2001 From: Florian Engelhardt Date: Fri, 23 Jan 2026 11:41:16 +0100 Subject: [PATCH 11/14] log error instead of unwrap() --- profiling/src/wall_time.rs | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) diff --git a/profiling/src/wall_time.rs b/profiling/src/wall_time.rs index 6fa542b429a..6aa7d95a347 100644 --- a/profiling/src/wall_time.rs +++ b/profiling/src/wall_time.rs @@ -150,7 +150,7 @@ mod frameless { use dynasmrt::{dynasm, DynasmApi, ExecutableBuffer}; use std::ffi::c_void; use std::sync::atomic::Ordering; - use log::debug; + use log::{debug, error}; use crate::bindings::{zend_flf_functions, zend_flf_handlers, zend_frameless_function_info}; use crate::{profiling::Profiler, RefCellExt, REQUEST_LOCALS, zend}; @@ -172,7 +172,13 @@ mod frameless { i += 1; } - let mut assembler = Assembler::new().unwrap(); + let mut assembler = match Assembler::new() { + Ok(assembler) => assembler, + Err(e) => { + error!("Failed to create assembler for FLF trampolines: {e}. Frameless functions will not appear in wall-time profiles."); + return; + } + }; let interrupt_addr = ddog_php_prof_icall_trampoline_target as *const (); let mut offsets = Vec::new(); // keep function offsets for orig in originals.iter() { @@ -209,7 +215,13 @@ mod frameless { // Allocate enough space for all frameless_function_infos including trailing NULLs let mut infos = Vec::with_capacity(originals.len() * 2); - let buffer = assembler.finalize().unwrap(); + let buffer = match assembler.finalize() { + Ok(buffer) => buffer, + Err(_) => { + error!("Failed to finalize FLF trampolines (mprotect PROT_EXEC denied?). Frameless functions will not appear in cpu/wall-time profiles. This may be caused by security policies (SELinux, seccomp, etc.)."); + return; + } + }; let mut last_infos = std::ptr::null_mut(); for (i, offset) in offsets.iter().enumerate() { let wrapper = buffer.as_ptr().add(offset.0) as *mut c_void; From 74392db45e1dc3c01a3f66b49cbed6c5ccde1367 Mon Sep 17 00:00:00 2001 From: Florian Engelhardt Date: Wed, 28 Jan 2026 14:57:26 +0100 Subject: [PATCH 12/14] release borrow as soon as possible --- profiling/src/wall_time.rs | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/profiling/src/wall_time.rs b/profiling/src/wall_time.rs index 6aa7d95a347..cf3d2230069 100644 --- a/profiling/src/wall_time.rs +++ b/profiling/src/wall_time.rs @@ -264,26 +264,23 @@ mod frameless { #[no_mangle] #[inline(never)] pub extern "C" fn ddog_php_prof_icall_trampoline_target() { - let result = REQUEST_LOCALS.try_with_borrow(|locals| { - if !locals.system_settings().profiling_enabled { - return; - } - - // Check whether we are actually wanting an interrupt to be handled. - let interrupt_count = locals.interrupt_count.swap(0, Ordering::SeqCst); - if interrupt_count == 0 { - return; - } + let interrupt_count = REQUEST_LOCALS + .try_with_borrow(|locals| { + if !locals.system_settings().profiling_enabled { + return 0; + } + locals.interrupt_count.swap(0, Ordering::SeqCst) + }) + .unwrap_or(0); - if let Some(profiler) = Profiler::get() { - // SAFETY: profiler doesn't mutate execute_data - let execute_data = unsafe { zend::ddog_php_prof_get_current_execute_data() }; - profiler.collect_time(execute_data, interrupt_count); - } - }); + if interrupt_count == 0 { + return; + } - if let Err(err) = result { - debug!("ddog_php_prof_icall_trampoline_target failed to borrow request locals: {err}"); + if let Some(profiler) = Profiler::get() { + // SAFETY: profiler doesn't mutate execute_data + let execute_data = unsafe { zend::ddog_php_prof_get_current_execute_data() }; + profiler.collect_time(execute_data, interrupt_count); } } } From c615b0100612090394667bded7f02431c34ce194 Mon Sep 17 00:00:00 2001 From: Florian Engelhardt Date: Wed, 28 Jan 2026 15:04:57 +0100 Subject: [PATCH 13/14] make clippy happy --- profiling/src/wall_time.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/profiling/src/wall_time.rs b/profiling/src/wall_time.rs index cf3d2230069..053645e4ef3 100644 --- a/profiling/src/wall_time.rs +++ b/profiling/src/wall_time.rs @@ -150,7 +150,7 @@ mod frameless { use dynasmrt::{dynasm, DynasmApi, ExecutableBuffer}; use std::ffi::c_void; use std::sync::atomic::Ordering; - use log::{debug, error}; + use log::error; use crate::bindings::{zend_flf_functions, zend_flf_handlers, zend_frameless_function_info}; use crate::{profiling::Profiler, RefCellExt, REQUEST_LOCALS, zend}; From e11b3fc2f451b3b2611c58183cc0956cf4ee9282 Mon Sep 17 00:00:00 2001 From: Florian Engelhardt Date: Wed, 28 Jan 2026 15:08:49 +0100 Subject: [PATCH 14/14] fix after merge master --- profiling/build.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/profiling/build.rs b/profiling/build.rs index 2435e0f22b6..d814cb3beab 100644 --- a/profiling/build.rs +++ b/profiling/build.rs @@ -379,9 +379,7 @@ fn cfg_fibers(vernum: u64) -> bool { } fn cfg_frameless(vernum: u64) -> bool { - if has_check_cfg() { - println!("cargo::rustc-check-cfg=cfg(php_frameless)"); - } + println!("cargo::rustc-check-cfg=cfg(php_frameless)"); if vernum >= 80400 { println!("cargo:rustc-cfg=php_frameless"); true