Merge pull request #188 from TorchTrade/fix/187-fractional-rebalancing

BY571 · web-flow · commit 48b0a4069e5a · 2026-02-11T14:07:24.000+01:00
Fix fractional position sizing rebalancing on repeated actions
diff --git a/tests/envs/offline/test_sequential.py b/tests/envs/offline/test_sequential.py
@@ -803,6 +803,106 @@ def test_check_env_specs_passes(self, unified_env):
         from torchrl.envs.utils import check_env_specs
         check_env_specs(unified_env)
 
+    @pytest.mark.parametrize("action_levels,leverage,repeat_action_idx", [
+        ([0, 1], 1, 1),            # Spot: repeat buy (long)
+        ([-1, 0, 1], 5, 2),        # Futures: repeat long
+        ([-1, 0, 1], 5, 0),        # Futures: repeat short
+    ], ids=["spot-long", "futures-long", "futures-short"])
+    def test_repeated_action_does_not_rebalance(
+        self, sample_ohlcv_df, action_levels, leverage, repeat_action_idx
+    ):
+        """Repeating the same action should hold, not rebalance.
+
+        Regression test for #187: fractional position sizing recalculated
+        target from drifting portfolio_value, causing constant-leverage
+        rebalancing (close_partial / increase) when the agent repeated
+        the same action.
+        """
+        config = SequentialTradingEnvConfig(
+            action_levels=action_levels,
+            leverage=leverage,
+            initial_cash=1000,
+            transaction_fee=0.0,
+            slippage=0.0,
+            time_frames=[TimeFrame(1, TimeFrameUnit.Minute)],
+            window_sizes=[10],
+            execute_on=TimeFrame(1, TimeFrameUnit.Minute),
+        )
+        env = SequentialTradingEnv(sample_ohlcv_df, config, simple_feature_fn)
+        td = env.reset()
+
+        # Step 1: open position
+        action_td = td.clone()
+        action_td["action"] = torch.tensor(repeat_action_idx)
+        td = env.step(action_td)
+
+        position_after_open = env.position.position_size
+        assert position_after_open != 0, "Position should have opened"
+
+        # Steps 2-50: repeat same action — position size must not change
+        trades_executed = 0
+        for _ in range(49):
+            action_td = td["next"].clone()
+            action_td["action"] = torch.tensor(repeat_action_idx)
+            td = env.step(action_td)
+            if td["next"]["done"].item():
+                break
+            if env.position.position_size != position_after_open:
+                trades_executed += 1
+
+        assert trades_executed == 0, (
+            f"Repeating the same action should hold, not rebalance. "
+            f"Position changed {trades_executed} times (issue #187)"
+        )
+        env.close()
+
+    @pytest.mark.parametrize("action_levels,leverage,open_idx,close_idx", [
+        ([0, 1], 1, 1, 0),            # Spot: long then sell
+        ([-1, 0, 1], 5, 2, 1),        # Futures: long then close
+        ([-1, 0, 1], 5, 0, 1),        # Futures: short then close
+    ], ids=["spot-close", "futures-close-long", "futures-close-short"])
+    def test_action_change_after_repeated_holds_still_executes(
+        self, sample_ohlcv_df, action_levels, leverage, open_idx, close_idx
+    ):
+        """Changing action after repeated holds must still execute.
+
+        Regression test for #187: ensures the _prev_action_value guard
+        does not accidentally lock agents into positions they cannot exit.
+        """
+        config = SequentialTradingEnvConfig(
+            action_levels=action_levels,
+            leverage=leverage,
+            initial_cash=1000,
+            transaction_fee=0.0,
+            slippage=0.0,
+            time_frames=[TimeFrame(1, TimeFrameUnit.Minute)],
+            window_sizes=[10],
+            execute_on=TimeFrame(1, TimeFrameUnit.Minute),
+        )
+        env = SequentialTradingEnv(sample_ohlcv_df, config, simple_feature_fn)
+        td = env.reset()
+
+        # Open position and repeat for 10 steps
+        action_td = td.clone()
+        action_td["action"] = torch.tensor(open_idx)
+        td = env.step(action_td)
+        assert env.position.position_size != 0, "Position should have opened"
+
+        for _ in range(10):
+            action_td = td["next"].clone()
+            action_td["action"] = torch.tensor(open_idx)
+            td = env.step(action_td)
+
+        # Now close — must actually execute
+        action_td = td["next"].clone()
+        action_td["action"] = torch.tensor(close_idx)
+        td = env.step(action_td)
+
+        assert env.position.position_size == 0, (
+            "Position should have closed after action change (issue #187)"
+        )
+        env.close()
+
 
 # ============================================================================
 # PER-TIMEFRAME FEATURE PROCESSING TESTS (Issue #177)
diff --git a/torchtrade/envs/offline/sequential.py b/torchtrade/envs/offline/sequential.py
@@ -458,6 +458,7 @@ def _reset_position_state(self):
         self.unrealized_pnl = 0.0
         self.unrealized_pnl_pct = 0.0
         self.liquidation_price = 0.0
+        self._prev_action_value = None
 
     def _step(self, tensordict: TensorDictBase) -> TensorDictBase:
         """Execute one environment step."""
@@ -600,6 +601,12 @@ def _execute_fractional_action(self, action_value: float, execution_price: float
         Returns:
             trade_info: Dict with execution details
         """
+        # If action hasn't changed and we already have a position, just hold
+        if action_value == self._prev_action_value and self.position.position_size != 0:
+            self.position.hold_counter += 1
+            return {"executed": False, "side": None, "fee_paid": 0.0, "liquidated": False}
+        self._prev_action_value = action_value
+
         # Calculate target position from action value
         target_position_size, target_notional, target_side = (
             self._calculate_fractional_position(action_value, execution_price)