diff --git a/Cargo.lock b/Cargo.lock index 7ba737bef2..497ed25686 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -390,7 +390,7 @@ dependencies = [ "cpp_demangle", "gimli 0.32.0", "libc 0.2.177", - "memmap2", + "memmap2 0.9.5", "miniz_oxide", "rustc-demangle", ] @@ -1245,6 +1245,7 @@ dependencies = [ "criterion-perf-events", "crossbeam-channel", "datadog-php-profiling", + "dynasmrt", "env_logger 0.11.6", "lazy_static", "libc 0.2.177", @@ -1505,6 +1506,33 @@ version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0d6ef0072f8a535281e4876be788938b528e9a1d43900b82c2569af7da799125" +[[package]] +name = "dynasm" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33dc03612f42465a8ed7f5e354bc2b79ba54cedefa81d5bd3a064f1835adaba8" +dependencies = [ + "bitflags 1.3.2", + "byteorder", + "lazy_static", + "proc-macro-error", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "dynasmrt" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7dccc31a678058996aef614f6bd418ced384da70f284e83e2b7bf29b27b6a28" +dependencies = [ + "byteorder", + "dynasm", + "fnv", + "memmap2 0.5.10", +] + [[package]] name = "educe" version = "0.4.23" @@ -3041,6 +3069,15 @@ dependencies = [ "rustix", ] +[[package]] +name = "memmap2" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "83faa42c0a078c393f6b29d5db232d8be22776a891f8f56e5284faee4a20b327" +dependencies = [ + "libc 0.2.177", +] + [[package]] name = "memmap2" version = "0.9.5" @@ -4640,7 +4677,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "13a4dfe4bbeef59c1f32fc7524ae7c95b9e1de5e79a43ce1604e181081d71b0c" dependencies = [ "debugid", - "memmap2", + "memmap2 0.9.5", "stable_deref_trait", "uuid", ] diff --git a/profiling/Cargo.toml b/profiling/Cargo.toml index 222392889f..2596609896 100644 --- a/profiling/Cargo.toml +++ b/profiling/Cargo.toml @@ -22,6 +22,7 @@ cfg-if = { version = "1.0" } cpu-time = { version = "1.0" } chrono = { version = "0.4" } crossbeam-channel = { version = "0.5", default-features = false, features = ["std"] } +dynasmrt = "2.0" libdd-alloc = { git = "https://github.com/DataDog/libdatadog", tag = "v25.0.0" } libdd-profiling = { git = "https://github.com/DataDog/libdatadog", tag = "v25.0.0" } libdd-common = { git = "https://github.com/DataDog/libdatadog", tag = "v25.0.0" } diff --git a/profiling/build.rs b/profiling/build.rs index b8566e4a5b..d814cb3bea 100644 --- a/profiling/build.rs +++ b/profiling/build.rs @@ -36,6 +36,7 @@ fn main() { let post_startup_cb = cfg_post_startup_cb(vernum); let preload = cfg_preload(vernum); let fibers = cfg_fibers(vernum); + let frameless = cfg_frameless(vernum); let run_time_cache = cfg_run_time_cache(vernum); let trigger_time_sample = cfg_trigger_time_sample(); let zend_error_observer = cfg_zend_error_observer(vernum); @@ -47,6 +48,7 @@ fn main() { preload, run_time_cache, fibers, + frameless, trigger_time_sample, zend_error_observer, ); @@ -102,6 +104,7 @@ fn build_zend_php_ffis( preload: bool, run_time_cache: bool, fibers: bool, + frameless: bool, trigger_time_sample: bool, zend_error_observer: bool, ) { @@ -142,6 +145,7 @@ fn build_zend_php_ffis( let post_startup_cb = if post_startup_cb { "1" } else { "0" }; let preload = if preload { "1" } else { "0" }; let fibers = if fibers { "1" } else { "0" }; + let frameless = if frameless { "1" } else { "0" }; let run_time_cache = if run_time_cache { "1" } else { "0" }; let trigger_time_sample = if trigger_time_sample { "1" } else { "0" }; let zend_error_observer = if zend_error_observer { "1" } else { "0" }; @@ -158,6 +162,7 @@ fn build_zend_php_ffis( .define("CFG_POST_STARTUP_CB", post_startup_cb) .define("CFG_PRELOAD", preload) .define("CFG_FIBERS", fibers) + .define("CFG_FRAMELESS", frameless) .define("CFG_RUN_TIME_CACHE", run_time_cache) .define("CFG_STACK_WALKING_TESTS", stack_walking_tests) .define("CFG_TRIGGER_TIME_SAMPLE", trigger_time_sample) @@ -373,6 +378,16 @@ fn cfg_fibers(vernum: u64) -> bool { } } +fn cfg_frameless(vernum: u64) -> bool { + println!("cargo::rustc-check-cfg=cfg(php_frameless)"); + if vernum >= 80400 { + println!("cargo:rustc-cfg=php_frameless"); + true + } else { + false + } +} + fn cfg_php_feature_flags(vernum: u64) { println!("cargo::rustc-check-cfg=cfg(php_gc_status, php_zend_compile_string_has_position, php_gc_status_extended, php_frameless, php_opcache_restart_hook, php_zend_mm_set_custom_handlers_ex)"); @@ -386,7 +401,6 @@ fn cfg_php_feature_flags(vernum: u64) { println!("cargo:rustc-cfg=php_gc_status_extended"); } if vernum >= 80400 { - println!("cargo:rustc-cfg=php_frameless"); println!("cargo:rustc-cfg=php_opcache_restart_hook"); println!("cargo:rustc-cfg=php_zend_mm_set_custom_handlers_ex"); } diff --git a/profiling/src/php_ffi.c b/profiling/src/php_ffi.c index e249615cbd..451ec022c3 100644 --- a/profiling/src/php_ffi.c +++ b/profiling/src/php_ffi.c @@ -109,6 +109,10 @@ bool ddog_php_prof_is_post_startup(void) { static post_startup_cb_result (*orig_post_startup_cb)(void) = NULL; static post_startup_cb_result ddog_php_prof_post_startup_cb(void) { +#if CFG_FRAMELESS + ddog_php_prof_post_startup(); // before preload+JIT (which may hardcode the flf handlers) +#endif + if (orig_post_startup_cb) { post_startup_cb_result (*cb)(void) = orig_post_startup_cb; @@ -117,7 +121,7 @@ static post_startup_cb_result ddog_php_prof_post_startup_cb(void) { return FAILURE; } } - + _is_post_startup = true; return SUCCESS; diff --git a/profiling/src/php_ffi.h b/profiling/src/php_ffi.h index d5ee0f9708..99022fa818 100644 --- a/profiling/src/php_ffi.h +++ b/profiling/src/php_ffi.h @@ -147,6 +147,10 @@ void ddog_php_prof_zend_mm_set_custom_handlers(zend_mm_heap *heap, zend_execute_data* ddog_php_prof_get_current_execute_data(); +#if CFG_FRAMELESS +void ddog_php_prof_post_startup(); +#endif + #if CFG_FIBERS zend_fiber* ddog_php_prof_get_active_fiber(); zend_fiber* ddog_php_prof_get_active_fiber_test(); diff --git a/profiling/src/wall_time.rs b/profiling/src/wall_time.rs index bfd9390c90..053645e4ef 100644 --- a/profiling/src/wall_time.rs +++ b/profiling/src/wall_time.rs @@ -137,6 +137,161 @@ pub extern "C" fn ddog_php_prof_interrupt_function(execute_data: *mut zend_execu } } +#[cfg(php_frameless)] +mod frameless { + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] + mod trampoline { + #[cfg(target_arch = "aarch64")] + use dynasmrt::aarch64::Assembler; + #[cfg(target_arch = "aarch64")] + use dynasmrt::DynasmLabelApi; + #[cfg(target_arch = "x86_64")] + use dynasmrt::x64::Assembler; + use dynasmrt::{dynasm, DynasmApi, ExecutableBuffer}; + use std::ffi::c_void; + use std::sync::atomic::Ordering; + use log::error; + use crate::bindings::{zend_flf_functions, zend_flf_handlers, zend_frameless_function_info}; + use crate::{profiling::Profiler, RefCellExt, REQUEST_LOCALS, zend}; + + // This ensures that the memory stays reachable and is replaced on apache reload for example + static mut INFOS: Vec = Vec::new(); + static mut BUFFER: Option = None; + + pub unsafe fn install() { + // Collect frameless functions ahead of time to batch-process them. + // Otherwise we get a new memory page per function. + let mut originals = Vec::new(); + let mut i = 0; + loop { + let original = *zend_flf_handlers.add(i); + if original.is_null() { + break; + } + originals.push(original); + i += 1; + } + + let mut assembler = match Assembler::new() { + Ok(assembler) => assembler, + Err(e) => { + error!("Failed to create assembler for FLF trampolines: {e}. Frameless functions will not appear in wall-time profiles."); + return; + } + }; + let interrupt_addr = ddog_php_prof_icall_trampoline_target as *const (); + let mut offsets = Vec::new(); // keep function offsets + for orig in originals.iter() { + offsets.push(assembler.offset()); + // Calls original function, then calls interrupt function. + #[cfg(target_arch = "aarch64")] + { + // We need labels on aarch64 as immediates cannot be more than 16 bits + dynasm!(assembler + ; stp x29, x30, [sp, -16]! // save link register and allow clobber of x29 + ; mov x29, sp // store stack pointer + ; ldr x16, >orig_label + ; blr x16 + ; ldp x29, x30, [sp], 16 // restore link register and x29 + ; ldr x16, >interrupt_label + ; br x16 // tail call + ; orig_label: ; .qword *orig as i64 + ); + } + #[cfg(target_arch = "x86_64")] + dynasm!(assembler + ; push rbp // align stack + ; mov rax, QWORD *orig as i64 + ; call rax + ; pop rbp // restore stack + ; mov rax, QWORD interrupt_addr as i64 + ; jmp rax // tail call + ); + } + #[cfg(target_arch = "aarch64")] + dynasm!(assembler + ; interrupt_label: ; .qword interrupt_addr as i64 ); + + // Allocate enough space for all frameless_function_infos including trailing NULLs + let mut infos = Vec::with_capacity(originals.len() * 2); + + let buffer = match assembler.finalize() { + Ok(buffer) => buffer, + Err(_) => { + error!("Failed to finalize FLF trampolines (mprotect PROT_EXEC denied?). Frameless functions will not appear in cpu/wall-time profiles. This may be caused by security policies (SELinux, seccomp, etc.)."); + return; + } + }; + let mut last_infos = std::ptr::null_mut(); + for (i, offset) in offsets.iter().enumerate() { + let wrapper = buffer.as_ptr().add(offset.0) as *mut c_void; + *zend_flf_handlers.add(i) = wrapper; + let func = &mut **zend_flf_functions.add(i); + + // We need to do copies of frameless_function_infos as they may be readonly memory + let original_info = func.internal_function.frameless_function_infos; + if original_info != last_infos { + let info_size = infos.len(); + let mut ptr = original_info; + loop { + let info = *ptr; + infos.push(info); + if info.handler.is_null() { + break; + } + ptr = ptr.add(1); + } + last_infos = infos.as_ptr().add(info_size) as *mut _; + func.internal_function.frameless_function_infos = last_infos; + } + let mut ptr = last_infos; + loop { + let info = &mut *ptr; + if info.handler.is_null() { + break; + } + if info.handler == originals[i] { + info.handler = wrapper; + } + ptr = ptr.add(1); + } + } + + INFOS = infos; + BUFFER = Some(buffer); + } + + #[no_mangle] + #[inline(never)] + pub extern "C" fn ddog_php_prof_icall_trampoline_target() { + let interrupt_count = REQUEST_LOCALS + .try_with_borrow(|locals| { + if !locals.system_settings().profiling_enabled { + return 0; + } + locals.interrupt_count.swap(0, Ordering::SeqCst) + }) + .unwrap_or(0); + + if interrupt_count == 0 { + return; + } + + if let Some(profiler) = Profiler::get() { + // SAFETY: profiler doesn't mutate execute_data + let execute_data = unsafe { zend::ddog_php_prof_get_current_execute_data() }; + profiler.collect_time(execute_data, interrupt_count); + } + } + } + + #[no_mangle] + pub unsafe extern "C" fn ddog_php_prof_post_startup() { + #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))] + trampoline::install(); + } +} + /// A wrapper for the `ddog_php_prof_interrupt_function` to call the /// previous interrupt handler, if there was one. #[no_mangle]