Pull thunder PR "Remove the --profile option"

tbqh · tbqh · commit b21af254bada · 2025-11-21T15:09:01.000-08:00
Lightning-AI/lightning-thunder#2715
diff --git a/benchmarks/python/benchmark_inference.py b/benchmarks/python/benchmark_inference.py
@@ -224,7 +224,6 @@ class InferenceBenchmarkConfig:
     mode: str
     disable_moe_replacement: bool
     attn_implementation: str | None
-    profile: bool
     thunder_cache: str | None
     enable_thunder_cudagraph: bool
 
@@ -557,10 +556,17 @@ def run_benchmark(self) -> InferenceMetrics:
         for _ in tqdm(range(self.config.num_iterations), disable=LOCAL_RANK != 0):
             past_key_values.reset()
 
-            if self.config.profile:
+            is_under_nsys = bool(os.environ.get("NSYS_PROFILING_SESSION_ID"))
+            # Wrap each non-warmup iteration with cudaProfilerStart() and
+            # cudaProfilerStop(). This allows the user to run
+            # ```shell
+            # nsys profile --capture-range=cudaProfilerApi --capture-range-end=repeat:<N> ...
+            # ```
+            # to record only the non-warmup iterations.
+            if is_under_nsys:
                 torch.cuda.cudart().cudaProfilerStart()
             iter_metrics = self.measure_inference_step(input_ids, past_key_values, self.config.output_length)
-            if self.config.profile:
+            if is_under_nsys:
                 torch.cuda.cudart().cudaProfilerStop()
 
             all_metrics.append(iter_metrics)
@@ -748,11 +754,6 @@ def parse_args() -> argparse.Namespace:
         action="store_true",
         help="let nvfuser take care of linear and matmul, note that this might fail with distributed run. See: https://github.com/NVIDIA/Fuser/issues/4507",
     )
-    parser.add_argument(
-        "--profile",
-        action="store_true",
-        help="Wrap each non-warmup iteration with cudaProfilerStart() and cudaProfilerStop(). This allows us to run `nsys profile --capture-range=cudaProfilerApi --capture-range-end=repeat:<N> ... --profile` to record only the non-warmup iterations.",
-    )
 
     parser.add_argument(
         "--thunder-trace",
@@ -801,7 +802,6 @@ def main():
         enable_nv_linear=args.enable_nv_linear,
         disable_moe_replacement=args.disable_moe_replacement,
         attn_implementation=args.attn_implementation,
-        profile=args.profile,
         thunder_cache=args.thunder_cache,
         enable_thunder_cudagraph=args.enable_thunder_cudagraph,
     )