Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
214 changes: 214 additions & 0 deletions crates/bevy_render/src/error_handler.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
use alloc::sync::Arc;
use bevy_ecs::{
resource::Resource,
world::{Mut, World},
};
use std::sync::Mutex;
use wgpu::ErrorSource;
use wgpu_types::error::ErrorType;

use crate::{
insert_future_resources,
render_resource::PipelineCache,
renderer::{RenderDevice, WgpuWrapper},
settings::RenderCreation,
FutureRenderResources, RenderStartup,
};

/// Resource to indicate renderer behavior upon error.
pub enum RenderErrorPolicy {
/// Pretends nothing happened and continues rendering.
/// This discards the error after logging it to console.
Ignore,
/// Keeps the app alive, but stops rendering further.
/// This keeps the error state, and will continue polling the [`RenderErrorHandler`]
/// every frame until some other policy is returned.
StopRendering,
/// Attempt renderer recovery with the given [`RenderCreation`].
Recover(RenderCreation),
}

/// Determines what [`RenderErrorPolicy`] should be used to respond to a given [`RenderError`].
///
/// The handler has access to both the main world and the render world in that order.
/// By the time this is invoked, the error has already been logged. The error is provided
/// for the decision-making reason of how to appropriately respond to it. Not all errors
/// are equally severe: validation errors may be ignored for example, while device lost errors
/// require recovery to continue rendering.
#[derive(Resource)]
pub struct RenderErrorHandler(
pub for<'a> fn(&'a RenderError, &'a mut World, &'a mut World) -> RenderErrorPolicy,
);

impl RenderErrorHandler {
fn handle(&self, error: &RenderError, main_world: &mut World, render_world: &mut World) {
match self.0(error, main_world, render_world) {
RenderErrorPolicy::Ignore => {
// Pretend that didn't happen.
render_world.insert_resource(RenderState::Ready);
}
RenderErrorPolicy::StopRendering => {
// do nothing
}
RenderErrorPolicy::Recover(render_creation) => {
assert!(insert_future_resources(&render_creation, main_world));
render_world.insert_resource(RenderState::Reinitializing);
}
}
}
}

impl Default for RenderErrorHandler {
fn default() -> Self {
// This is what we've always done historically,
// but we could choose a new default once recovery works better.
Self(|_, _, _| RenderErrorPolicy::Ignore)
}
}

/// An error encountered during rendering.
#[derive(Debug)]
pub struct RenderError {
pub ty: ErrorType,
pub description: String,
pub source: Option<WgpuWrapper<ErrorSource>>,
}

/// The current state of the renderer.
#[derive(Resource, Debug)]
pub(crate) enum RenderState {
/// Just started, [`crate::RenderStartup`] will run in this state.
Initializing,
/// Everything is okay and we are rendering stuff every frame.
Ready,
/// An error was encountered, and we may decide how to handle it.
Errored(RenderError),
/// We are recreating the render context after an error to recover.
Reinitializing,
}

/// Resource to allow polling wgpu error handlers.
#[derive(Resource)]
pub(crate) struct DeviceErrorHandler {
device_lost: Arc<Mutex<Option<(wgpu::DeviceLostReason, String)>>>,
uncaptured: Arc<Mutex<Option<WgpuWrapper<wgpu::Error>>>>,
}

impl DeviceErrorHandler {
/// Creates and registers error handlers on the given device and stores them to later be polled.
pub(crate) fn new(device: &RenderDevice) -> Self {
let device_lost = Arc::new(Mutex::new(None));
let uncaptured = Arc::new(Mutex::new(None));
{
// scoped clone to move into closures
let device_lost = device_lost.clone();
let uncaptured = uncaptured.clone();
let device = device.wgpu_device();
// we log errors as soon as they are captured so they stay chronological in logs
// and only keep the first error, as it often causes other errors downstream
device.set_device_lost_callback(move |reason, str| {
bevy_log::error!("Caught DeviceLost error: {reason:?} {str}");
assert!(device_lost.lock().unwrap().replace((reason, str)).is_none());
});
device.on_uncaptured_error(Arc::new(move |e| {
bevy_log::error!("Caught rendering error: {e}");
uncaptured
.lock()
.unwrap()
.get_or_insert(WgpuWrapper::new(e));
}));
}
Self {
device_lost,
uncaptured,
}
}

/// Checks to see if any errors have been caught, and returns an appropriate `RenderState`
pub(crate) fn poll(&self) -> Option<RenderError> {
// Device lost is more important so we let it take precedence; every error gets logged anyways.
if let Some((_, description)) = self.device_lost.lock().unwrap().take() {
return Some(RenderError {
ty: ErrorType::DeviceLost,
description,
source: None,
});
}
if let Some(error) = self.uncaptured.lock().unwrap().take() {
let (ty, description, source) = match error.into_inner() {
wgpu::Error::OutOfMemory { source } => {
(ErrorType::OutOfMemory, "".to_string(), source)
}
wgpu::Error::Validation {
source,
description,
} => (ErrorType::Validation, description, source),
wgpu::Error::Internal {
source,
description,
} => (ErrorType::Internal, description, source),
};
return Some(RenderError {
ty,
description,
source: Some(WgpuWrapper::new(source)),
});
}
None
}
}

/// Updates the state machine that handles the renderer and device lifecycle.
/// Polls the [`DeviceErrorHandler`] and fires the [`RenderErrorHandler`] if needed.
///
/// Runs [`crate::RenderStartup`] after every time a [`RenderDevice`] is acquired.
///
/// We need both the main and render world to properly handle errors, so we wedge ourselves into [extract](bevy_app::SubApp::set_extract).
pub(crate) fn update_state(main_world: &mut World, render_world: &mut World) {
if let Some(error) = render_world.resource::<DeviceErrorHandler>().poll() {
render_world.insert_resource(RenderState::Errored(error));
};

// Remove the render state so we can provide both worlds to the `RenderErrorHandler`.
let state = render_world.remove_resource::<RenderState>().unwrap();

match &state {
RenderState::Initializing => {
render_world.run_schedule(RenderStartup);
render_world.insert_resource(RenderState::Ready);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we really just instantly transition from Initializing into Ready?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe the transition into Ready should be done by RenderStartup itself (in case it has its own fallibility).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think its better to keep all the state-machine-y things in the function together

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm fine with keeping this together for now, but I suspect we may need to refactor this later.

}
RenderState::Ready => {
// all is well
}
RenderState::Errored(error) => {
main_world.resource_scope(|main_world, error_handler: Mut<RenderErrorHandler>| {
error_handler.handle(error, main_world, render_world);
});
}
RenderState::Reinitializing => {
if let Some(render_resources) = main_world
.get_resource::<FutureRenderResources>()
.unwrap()
.clone()
.lock()
.unwrap()
.take()
{
let synchronous_pipeline_compilation = render_world
.resource::<PipelineCache>()
.synchronous_pipeline_compilation;
render_resources.unpack_into(
main_world,
render_world,
synchronous_pipeline_compilation,
);
render_world.insert_resource(RenderState::Initializing);
}
}
}

// Put the state back if we didn't set a new one
if render_world.get_resource::<RenderState>().is_none() {
render_world.insert_resource(state);
}
}
56 changes: 41 additions & 15 deletions crates/bevy_render/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ pub mod batching;
pub mod camera;
pub mod diagnostic;
pub mod erased_render_asset;
pub mod error_handler;
pub mod extract_component;
pub mod extract_instances;
mod extract_param;
Expand Down Expand Up @@ -76,6 +77,7 @@ pub use extract_param::Extract;

use crate::{
camera::CameraPlugin,
error_handler::{RenderErrorHandler, RenderState},
gpu_readback::GpuReadbackPlugin,
mesh::{MeshRenderAssetPlugin, RenderMesh},
render_asset::prepare_assets,
Expand All @@ -95,7 +97,9 @@ use bevy_ecs::{
prelude::*,
schedule::{ScheduleBuildSettings, ScheduleLabel},
};
use bevy_platform::time::Instant;
use bevy_shader::{load_shader_library, Shader, ShaderLoader};
use bevy_time::TimeSender;
use bevy_utils::prelude::default;
use bevy_window::{PrimaryWindow, RawHandleWrapperHolder};
use bitflags::bitflags;
Expand Down Expand Up @@ -190,7 +194,10 @@ pub enum RenderSystems {
PostCleanup,
}

/// The startup schedule of the [`RenderApp`]
/// The startup schedule of the [`RenderApp`].
/// This can potentially run multiple times, and not on a fresh render world.
/// Every time a new [`RenderDevice`](renderer::RenderDevice) is acquired,
/// this schedule runs to initialize any gpu resources needed for rendering on it.
#[derive(ScheduleLabel, Debug, Hash, PartialEq, Eq, Clone, Default)]
pub struct RenderStartup;

Expand Down Expand Up @@ -262,6 +269,11 @@ pub struct ExtractSchedule;
#[derive(Resource, Default, Deref, DerefMut)]
pub struct MainWorld(World);

/// The render recovery schedule. This schedule runs the [`Render`] schedule if
/// we are in [`RenderState::Ready`], and is otherwise hidden from users.
#[derive(ScheduleLabel, Debug, Hash, PartialEq, Eq, Clone)]
struct RenderRecovery;

#[derive(Resource, Default, Clone, Deref)]
pub(crate) struct FutureRenderResources(Arc<Mutex<Option<RenderResources>>>);

Expand Down Expand Up @@ -303,7 +315,8 @@ impl Plugin for RenderPlugin {
diagnostic::RenderDiagnosticsPlugin,
));

app.init_resource::<RenderAssetBytesPerFrame>();
app.init_resource::<RenderAssetBytesPerFrame>()
.init_resource::<RenderErrorHandler>();
if let Some(render_app) = app.get_sub_app_mut(RenderApp) {
render_app.init_resource::<RenderAssetBytesPerFrameLimiter>();
render_app
Expand Down Expand Up @@ -403,7 +416,7 @@ unsafe fn initialize_render_app(app: &mut App) {
app.init_resource::<ScratchMainWorld>();

let mut render_app = SubApp::new();
render_app.update_schedule = Some(Render.intern());
render_app.update_schedule = Some(RenderRecovery.intern());

let mut extract_schedule = Schedule::new(ExtractSchedule);
// We skip applying any commands during the ExtractSchedule
Expand All @@ -416,9 +429,32 @@ unsafe fn initialize_render_app(app: &mut App) {

render_app
.add_schedule(extract_schedule)
.add_schedule(Schedule::new(RenderRecovery))
.add_schedule(Render::base_schedule())
.init_resource::<renderer::PendingCommandBuffers>()
.insert_resource(app.world().resource::<AssetServer>().clone())
.insert_resource(RenderState::Initializing)
.add_systems(RenderRecovery, move |world: &mut World| {
if matches!(world.resource::<RenderState>(), RenderState::Ready) {
world.run_schedule(Render);
}

// update the time and send it to the app world regardless of whether we render
let time_sender = world.resource::<TimeSender>();
if let Err(error) = time_sender.0.try_send(Instant::now()) {
match error {
bevy_time::TrySendError::Full(_) => {
panic!(
"The TimeSender channel should always be empty during render. \
You might need to add the bevy::core::time_system to your app."
);
}
bevy_time::TrySendError::Disconnected(_) => {
// ignore disconnected errors, the main world probably just got dropped during shutdown
}
}
}
})
.add_systems(ExtractSchedule, PipelineCache::extract_shaders)
.add_systems(
Render,
Expand All @@ -433,18 +469,8 @@ unsafe fn initialize_render_app(app: &mut App) {
),
);

// We want the closure to have a flag to only run the RenderStartup schedule once, but the only
// way to have the closure store this flag is by capturing it. This variable is otherwise
// unused.
let mut should_run_startup = true;
render_app.set_extract(move |main_world, render_world| {
if should_run_startup {
// Run the `RenderStartup` if it hasn't run yet. This does mean `RenderStartup` blocks
// the rest of the app extraction, but this is necessary since extraction itself can
// depend on resources initialized in `RenderStartup`.
render_world.run_schedule(RenderStartup);
should_run_startup = false;
}
render_app.set_extract(|main_world, render_world| {
error_handler::update_state(main_world, render_world);

{
#[cfg(feature = "trace")]
Expand Down
2 changes: 1 addition & 1 deletion crates/bevy_render/src/render_resource/pipeline_cache.rs
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ pub struct PipelineCache {
global_shader_defs: Vec<ShaderDefVal>,
/// If `true`, disables asynchronous pipeline compilation.
/// This has no effect on macOS, wasm, or without the `multi_threaded` feature.
synchronous_pipeline_compilation: bool,
pub(crate) synchronous_pipeline_compilation: bool,
}

impl PipelineCache {
Expand Down
15 changes: 0 additions & 15 deletions crates/bevy_render/src/renderer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,7 @@ use bevy_derive::{Deref, DerefMut};
use bevy_ecs::schedule::ScheduleLabel;
use bevy_ecs::{prelude::*, system::SystemState};
use bevy_log::{debug, info, info_span, warn};
use bevy_platform::time::Instant;
use bevy_render::camera::ExtractedCamera;
use bevy_time::TimeSender;
use bevy_window::RawHandleWrapperHolder;
use wgpu::{
Adapter, AdapterInfo, Backends, DeviceType, Instance, Queue, RequestAdapterOptions, Trace,
Expand Down Expand Up @@ -93,19 +91,6 @@ pub fn render_system(
}

crate::view::screenshot::collect_screenshots(world);

// update the time and send it to the app world
let time_sender = world.resource::<TimeSender>();
if let Err(error) = time_sender.0.try_send(Instant::now()) {
match error {
bevy_time::TrySendError::Full(_) => {
panic!("The TimeSender channel should always be empty during render. You might need to add the bevy::core::time_system to your app.");
}
bevy_time::TrySendError::Disconnected(_) => {
// ignore disconnected errors, the main world probably just got dropped during shutdown
}
}
}
}

/// This queue is used to enqueue tasks for the GPU to execute asynchronously.
Expand Down
Loading