Skip to content

Commit a700b4d

Browse files
feat(prof): improve time sample accuracy by gathering during allocation samples (#3559)
* perf(profiling): piggyback time samples onto allocation samples When allocation profiler takes a sample and interrupt_count > 0, collect both allocation and time samples in a single stack walk. Benefits: - Eliminates redundant stack walks when samples coincide. - Improves time sample accuracy (closer to timer expiry). * refactor orderings of collect_* and extract helpers * refactor(test): allocation + CPU combined tests * refactor: make jq for combined samples less brittle * refactor: revert Ordering optimizations Let's do this in a dedicated PR. * style: use SAFETY instead of Safety * test: hammer piggybacking * refactor: simplify test and hook it up properly * test: fix prof-correctness * ci(profiling): run allocation_time_combined on PHP 8.4+ * test(profiling): fix allocation_time_combined float->int * ci(profiling): update expected data * ci(profiling): run allocation_time_combined with ALLOCATION_SAMPLING_DISTANCE=1 * ci(profiling): ignore main for wall-time for allocation_time_combined * EXPERIMENTAL: check ZEND_OP_DATA too * DEBUG: do we hit frameless icalls? * DEBUG: more info in panic * refactor: simplify Profiler::get_timeline_timestamp * Revert "DEBUG: more info in panic" This reverts commit a3e3e4e. * Revert "DEBUG: do we hit frameless icalls?" This reverts commit e16e1b1. * undo experimental debugging * test: fix expected log message * test: use more replaces to exaggerate effect * EXPERIMENT: show leaf frame * do not fail fast, finish prof correctness * upload artifacts * Disable Xdebug for prof-correctness * Revert "upload artifacts" This reverts commit 44e2734. * test: allow any non-zero wall-time for piggy-backed samples * remove first_frame for performance * Remove misleading strlen and comment Co-authored-by: Florian Engelhardt <florian.engelhardt@datadoghq.com>
1 parent 16eaf8b commit a700b4d

File tree

7 files changed

+145
-25
lines changed

7 files changed

+145
-25
lines changed

.github/workflows/prof_correctness.yml

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,9 @@ jobs:
99
prof-correctness:
1010
runs-on: ubuntu-24.04
1111
strategy:
12+
fail-fast: false
1213
matrix:
13-
php-version: [8.0, 8.1, 8.2, 8.3, 8.4, 8.5]
14+
php-version: ['8.0', '8.1', '8.2', '8.3', '8.4', '8.5']
1415
phpts: [nts, zts]
1516
include:
1617
- phpts: zts
@@ -109,6 +110,26 @@ jobs:
109110
USE_ZEND_ALLOC=0 php -d extension=$PWD/target/profiler-release/libdatadog_php_profiling.so profiling/tests/correctness/allocations.php
110111
unset DD_PROFILING_ALLOCATION_SAMPLING_DISTANCE
111112
113+
- name: Run allocation_time_combined test (PHP 8.4+)
114+
if: fromJSON(matrix.php-version) >= 8.4
115+
run: |
116+
export DD_PROFILING_LOG_LEVEL=trace
117+
export DD_PROFILING_EXPERIMENTAL_FEATURES_ENABLED=1
118+
export DD_PROFILING_EXPERIMENTAL_EXCEPTION_SAMPLING_DISTANCE=1
119+
export DD_PROFILING_EXCEPTION_MESSAGE_ENABLED=1
120+
php -v
121+
php -d extension=target/profiler-release/libdatadog_php_profiling.so --ri datadog-profiling
122+
mkdir -p profiling/tests/correctness/allocation_time_combined/
123+
export DD_PROFILING_OUTPUT_PPROF=$PWD/profiling/tests/correctness/allocation_time_combined/test.pprof
124+
export DD_PROFILING_ALLOCATION_ENABLED=yes
125+
export DD_PROFILING_ALLOCATION_SAMPLING_DISTANCE=1
126+
export DD_PROFILING_WALL_TIME_ENABLED=yes
127+
export DD_PROFILING_EXPERIMENTAL_CPU_TIME_ENABLED=no
128+
export DD_PROFILING_ENDPOINT_COLLECTION_ENABLED=no
129+
export EXECUTION_TIME=5
130+
php -d extension=$PWD/target/profiler-release/libdatadog_php_profiling.so profiling/tests/correctness/allocation_time_combined.php
131+
unset DD_PROFILING_ALLOCATION_ENABLED DD_PROFILING_ALLOCATION_SAMPLING_DISTANCE DD_PROFILING_WALL_TIME_ENABLED DD_PROFILING_EXPERIMENTAL_CPU_TIME_ENABLED DD_PROFILING_ENDPOINT_COLLECTION_ENABLED EXECUTION_TIME
132+
112133
- name: Run ZTS tests
113134
if: matrix.phpts == 'zts'
114135
run: |
@@ -160,6 +181,13 @@ jobs:
160181
expected_json: profiling/tests/correctness/timeline.json
161182
pprof_path: profiling/tests/correctness/timeline/
162183

184+
- name: Check profiler correctness for allocation_time_combined
185+
if: fromJSON(matrix.php-version) >= 8.4
186+
uses: Datadog/prof-correctness/analyze@main
187+
with:
188+
expected_json: profiling/tests/correctness/allocation_time_combined.json
189+
pprof_path: profiling/tests/correctness/allocation_time_combined/
190+
163191
- name: Check profiler correctness for exceptions ZTS
164192
if: matrix.phpts == 'zts'
165193
uses: Datadog/prof-correctness/analyze@main

profiling/src/allocation/mod.rs

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,12 +92,21 @@ impl AllocationProfilingStats {
9292
#[cold]
9393
pub fn collect_allocation(len: size_t) {
9494
if let Some(profiler) = Profiler::get() {
95-
// Safety: execute_data was provided by the engine, and the profiler doesn't mutate it.
95+
// Check if there's a pending time interrupt that we can handle now
96+
// instead of waiting for an interrupt handler. This is slightly more
97+
// accurate and efficient, win-win.
98+
let interrupt_count = REQUEST_LOCALS
99+
.try_with_borrow(|locals| locals.interrupt_count.swap(0, Ordering::SeqCst))
100+
.unwrap_or(0);
101+
102+
// SAFETY: execute_data was provided by the engine, and the profiler
103+
// doesn't mutate it.
96104
unsafe {
97105
profiler.collect_allocations(
98106
zend::ddog_php_prof_get_current_execute_data(),
99107
1_i64,
100108
len as i64,
109+
(interrupt_count > 0).then_some(interrupt_count),
101110
)
102111
};
103112
}

profiling/src/profiling/mod.rs

Lines changed: 34 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -917,14 +917,7 @@ impl Profiler {
917917
let labels = Profiler::common_labels(0);
918918
let n_labels = labels.len();
919919

920-
let mut timestamp = NO_TIMESTAMP;
921-
{
922-
if self.is_timeline_enabled() {
923-
if let Ok(now) = SystemTime::now().duration_since(UNIX_EPOCH) {
924-
timestamp = now.as_nanos() as i64;
925-
}
926-
}
927-
}
920+
let timestamp = self.get_timeline_timestamp();
928921

929922
match self.prepare_and_send_message(
930923
frames,
@@ -951,33 +944,51 @@ impl Profiler {
951944
}
952945
}
953946

954-
/// Collect a stack sample with memory allocations.
947+
/// Collect a stack sample with memory allocations, and optionally time data.
948+
///
949+
/// When `interrupt_count` is provided, this piggybacks time sampling onto
950+
/// allocation sampling to avoid redundant stack walks.
955951
#[cfg_attr(feature = "tracing", tracing::instrument(skip_all))]
956952
pub fn collect_allocations(
957953
&self,
958954
execute_data: *mut zend_execute_data,
959955
alloc_samples: i64,
960956
alloc_size: i64,
957+
interrupt_count: Option<u32>,
961958
) {
962959
let result = collect_stack_sample(execute_data);
963960
match result {
964961
Ok(frames) => {
965962
let depth = frames.len();
963+
964+
// Optionally collect time data when interrupt_count is provided
965+
let (interrupt_count, wall_time, cpu_time, timestamp) =
966+
if let Some(count) = interrupt_count {
967+
let (wall_time, cpu_time) = CLOCKS.with_borrow_mut(Clocks::rotate_clocks);
968+
let timestamp = self.get_timeline_timestamp();
969+
(count as i64, wall_time, cpu_time, timestamp)
970+
} else {
971+
(0, 0, 0, NO_TIMESTAMP)
972+
};
973+
966974
let labels = Profiler::common_labels(0);
967975
let n_labels = labels.len();
968976

969977
match self.prepare_and_send_message(
970978
frames,
971979
SampleValues {
972-
alloc_size,
980+
interrupt_count,
981+
wall_time,
982+
cpu_time,
973983
alloc_samples,
984+
alloc_size,
974985
..Default::default()
975986
},
976987
labels,
977-
NO_TIMESTAMP,
988+
timestamp,
978989
) {
979990
Ok(_) => trace!(
980-
"Sent stack sample of {depth} frames, {n_labels} labels, {alloc_size} bytes allocated, and {alloc_samples} allocations to profiler."
991+
"Sent stack sample of {depth} frames, {n_labels} labels, {alloc_size} bytes allocated, {alloc_samples} allocations, and {interrupt_count} time interrupts to profiler."
981992
),
982993
Err(err) => warn!(
983994
"Failed to send stack sample of {depth} frames, {n_labels} labels, {alloc_size} bytes allocated, and {alloc_samples} allocations to profiler: {err}"
@@ -1018,14 +1029,7 @@ impl Profiler {
10181029

10191030
let n_labels = labels.len();
10201031

1021-
let mut timestamp = NO_TIMESTAMP;
1022-
{
1023-
if self.is_timeline_enabled() {
1024-
if let Ok(now) = SystemTime::now().duration_since(UNIX_EPOCH) {
1025-
timestamp = now.as_nanos() as i64;
1026-
}
1027-
}
1028-
}
1032+
let timestamp = self.get_timeline_timestamp();
10291033

10301034
match self.prepare_and_send_message(
10311035
frames,
@@ -1436,6 +1440,16 @@ impl Profiler {
14361440
}
14371441
}
14381442

1443+
/// Gets a timestamp for timeline profiling if timeline is enabled.
1444+
/// Returns NO_TIMESTAMP if timeline is disabled or if getting the time fails.
1445+
fn get_timeline_timestamp(&self) -> i64 {
1446+
self.is_timeline_enabled()
1447+
.then(|| SystemTime::now().duration_since(UNIX_EPOCH).ok())
1448+
.flatten()
1449+
.map(|now| now.as_nanos() as i64)
1450+
.unwrap_or(NO_TIMESTAMP)
1451+
}
1452+
14391453
/// Creates the common message labels for all samples.
14401454
///
14411455
/// * `n_extra_labels` - Reserve room for extra labels, such as when the

profiling/src/profiling/uploader.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -162,12 +162,11 @@ impl Uploader {
162162
}
163163
}
164164

165-
#[cfg(test)]
165+
#[cfg(all(test, feature = "debug_stats"))]
166166
mod tests {
167167
use super::*;
168168

169169
#[test]
170-
#[cfg(feature = "debug_stats")]
171170
fn test_create_internal_metadata() {
172171
// Set up all counters with known values
173172
EXCEPTION_PROFILING_EXCEPTION_COUNT.store(42, Ordering::Relaxed);
Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
{
2+
"scale_by_duration": true,
3+
"test_name": "php_allocation_time_combined",
4+
"stacks": [
5+
{
6+
"profile-type": "alloc-size",
7+
"stack-content": [
8+
{
9+
"regular_expression": "<?php;main;standard\\|str_replace$",
10+
"percent": 66,
11+
"error_margin": 1
12+
},
13+
{
14+
"regular_expression": "<?php;main;standard\\|str_repeat$",
15+
"percent": 33,
16+
"error_margin": 1
17+
}
18+
]
19+
},
20+
{
21+
"profile-type": "alloc-samples",
22+
"stack-content": [
23+
{
24+
"regular_expression": "<?php;main;standard\\|str_replace$",
25+
"percent": 66,
26+
"error_margin": 1
27+
},
28+
{
29+
"regular_expression": "<?php;main;standard\\|str_repeat$",
30+
"percent": 33,
31+
"error_margin": 1
32+
}
33+
]
34+
},
35+
{
36+
"profile-type": "wall-time",
37+
"stack-content": [
38+
{
39+
"regular_expression": "<?php;main;standard\\|str_replace$",
40+
"percent": 51,
41+
"error_margin": 50
42+
}
43+
]
44+
}
45+
]
46+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
<?php
2+
3+
// Test to verify that allocation and time profiling can coexist and that
4+
// the piggybacking optimization works (where time samples are taken during
5+
// allocation samples when interrupt_count > 0).
6+
//
7+
// This test uses str_replace, a frameless function (PHP 8.4+) which allocates
8+
// large amounts of memory, increasing the likelihood of capturing piggybacked
9+
// samples. Frameless functions don't handle VM interrupts, making them ideal
10+
// for demonstrating the piggybacking optimization.
11+
12+
function main() {
13+
$duration = $_ENV["EXECUTION_TIME"] ?? 10;
14+
$end = microtime(true) + $duration;
15+
16+
while (microtime(true) < $end) {
17+
// str_replace is frameless in PHP 8.4+ and allocates a new string
18+
$xs = str_repeat("x", 10_000_000); // 10MB source
19+
$ys = str_replace("x", "y", $xs); // 10MB allocation in frameless function
20+
$zs = str_replace("y", "z", $ys); // 10MB allocation in frameless function
21+
}
22+
}
23+
24+
main();

profiling/tests/phpt/allocation_sampling_distance.phpt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,5 +44,5 @@ $str = \str_repeat('a', 87);
4444
Done.
4545
--EXPECTREGEX--
4646
.* Memory allocation profiling initialized with a sampling distance of 1 bytes.*
47-
.* Sent stack sample of [0-9]* frames, .* labels, 11[2,9] bytes allocated, and 1 allocations to profiler.*
47+
.* Sent stack sample of [0-9]* frames, .* labels, 11[2,9] bytes allocated, 1 allocations, and .* time interrupts to profiler.*
4848
.*Done\..*

0 commit comments

Comments
 (0)