Skip to content
Merged

EFPI #53

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 30 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,11 @@ The **unravelsports** package aims to aid researchers, analysts and enthusiasts
This package currently supports:
- ⚽ 🏈 [**Polars DataFrame Conversion**](#polars-dataframes)
- ⚽ 🏈 [**Graph Neural Network**](#graph-neural-networks) Training, Graph Conversion and Prediction <small>
[[Bekkers & Sahasrabudhe (2023)](https://arxiv.org/pdf/2411.17450)]</small>
[[🔗 Bekkers & Sahasrabudhe (2023)](https://arxiv.org/pdf/2411.17450)]</small>
- ⚽ [**Pressing Intensity**](#pressing-intensity)
<small>[[Bekkers (2024)](https://arxiv.org/pdf/2501.04712)]</small>
<small>[[🔗 Bekkers (2024)](https://arxiv.org/pdf/2501.04712)]</small>
- ⚽ [**Formation and Position Identification (EFPI)**](#pressing-intensity)
<small>[[🔗 Bekkers (2025)](https://arxiv.org/pdf/2506.23843)]</small>

🌀 Features
-----
Expand All @@ -28,7 +30,7 @@ This package currently supports:

⚽🏈 **Convert Tracking Data** into [Polars DataFrames](https://pola.rs/) for rapid data conversion and data processing.

⚽ For soccer we rely on [Kloppy](https://kloppy.pysport.org/) and as such we support _Sportec_$^1$, _SkillCorner_$^1$, _PFF_$^{1, 2}$, _Metrica_$^1$, _StatsPerform_, _Tracab (CyronHego)_ and _SecondSpectrum_ tracking data.
⚽ For soccer we rely on [Kloppy](https://kloppy.pysport.org/) and as such we support Sportec, SkillCorner, PFF / GradientSports, Metrica, StatsPerform, Tracab (CyronHego), SecondSpectrum, HawkEye and Signality tracking data.
```python
from unravel.soccer import KloppyPolarsDataset

Expand All @@ -48,9 +50,6 @@ kloppy_polars_dataset = KloppyPolarsDataset(
| 4 | 1 | 0 days 00:00:00 | 10000 | alive | DFL-OBJ-0001HW | -46.26 | 0.08 | 0 | DFL-CLU-000005 | GK | DFL-MAT-J03WPY | 0.357 | 0.071 | 0 | 0.364 | 0 | 0 | 0 | 0 | DFL-CLU-00000P | False |


$^1$ <small>Open data available through kloppy.</small>

$^2$ <small>Currently unreleased in kloppy, only available through kloppy master branch. [Click here for World Cup 2022 Dataset](https://www.blog.fc.pff.com/blog/enhanced-2022-world-cup-dataset)</small>

🏈 For American Football we use [BigDataBowl Data](https://www.kaggle.com/competitions/nfl-big-data-bowl-2025/data) directly.

Expand Down Expand Up @@ -86,6 +85,8 @@ converter = SoccerGraphConverter(
)
```

---

### **Pressing Intensity**

Compute [**Pressing Intensity**](https://arxiv.org/abs/2501.04712) for a whole game (or segment) of Soccer tracking data.
Expand Down Expand Up @@ -113,6 +114,29 @@ model.fit(

![1. FC Köln vs. FC Bayern München (May 27th 2023)](assets/gif/preview.gif)

---

### **Formation and Position Identification**

Compute [Elastic Formation and Position Identification, **EFPI**](https://arxiv.org/pdf/2506.23843) for individual frames, possessions, periods or specific time intervals for Soccer.

For more information on all possibilities for "every" check out [Polars Documentation](https://docs.pola.rs/api/python/stable/reference/dataframe/api/polars.DataFrame.group_by_dynamic.html).

```python
from unravel.soccer import EFPI

model = EFPI(dataset=kloppy_polars_dataset)
model.fit(
# Default 65 formations , or specify a subset (e.g. ["442" , "433"])
formations=None,
# specific time intervals (e.g. 1m, 1m14s, 2m30s etc.), or specify "possession", "period" or "frame".
every="5m",
substitutions="drop",
change_threshold=0.1,
change_after_possession=True,
)
```

⌛ ***More to come soon...!***

🌀 Quick Start
Expand Down
270 changes: 270 additions & 0 deletions tests/test_soccer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
SoccerGraphConverter,
KloppyPolarsDataset,
PressingIntensity,
EFPI,
Constant,
Column,
Group,
Expand Down Expand Up @@ -1111,3 +1112,272 @@ def test_plot_error_wrong_extension_for_mp4(
end_timestamp=pl.duration(seconds=11, milliseconds=900),
period_id=1,
)

def test_efpi_frame_drop_0_true(
self, kloppy_polars_sportec_dataset: KloppyPolarsDataset
):
model = EFPI(
dataset=kloppy_polars_sportec_dataset,
)

model = model.fit(
formations=None,
every="frame",
substitutions="drop",
change_threshold=0.0,
change_after_possession=True,
)

single_frame = model.output.filter(pl.col(Column.FRAME_ID) == 10018)

assert model.segments == None
assert model.output.columns == [
Column.GAME_ID,
Column.PERIOD_ID,
Column.FRAME_ID,
Column.OBJECT_ID,
Column.TEAM_ID,
"position",
"formation",
Column.BALL_OWNING_TEAM_ID,
"is_attacking",
]
assert len(model.output) == 483
assert (
single_frame.filter(pl.col(Column.OBJECT_ID) == "DFL-OBJ-00008F")[
"position"
][0]
== "CB"
)
assert (
single_frame.filter(pl.col(Column.OBJECT_ID) == "DFL-OBJ-00008F")[
"formation"
][0]
== "3232"
)
assert (
single_frame.filter(pl.col(Column.OBJECT_ID) == "DFL-OBJ-00008F")[
"is_attacking"
][0]
== False
)
assert (
single_frame.filter(pl.col(Column.OBJECT_ID) == "DFL-OBJ-002FXT")[
"position"
][0]
== "LW"
)
assert (
single_frame.filter(pl.col(Column.OBJECT_ID) == "DFL-OBJ-002FXT")[
"formation"
][0]
== "31222"
)
assert (
single_frame.filter(pl.col(Column.OBJECT_ID) == "DFL-OBJ-002FXT")[
"is_attacking"
][0]
== True
)

assert (
single_frame.filter(pl.col(Column.OBJECT_ID) == "DFL-OBJ-0001HW")[
"position"
][0]
== "GK"
)
assert (
single_frame.filter(pl.col(Column.OBJECT_ID) == "DFL-OBJ-0028FW")[
"position"
][0]
== "GK"
)

def test_efpi_possession_drop_sg(
self, kloppy_polars_sportec_dataset: KloppyPolarsDataset
):
model = EFPI(
dataset=kloppy_polars_sportec_dataset,
)

model = model.fit(
formations="shaw-glickman",
every="possession",
substitutions="drop",
change_threshold=0.1,
change_after_possession=True,
)

assert isinstance(model.segments, pl.DataFrame)
assert len(model.segments) == 1
assert model.segments.columns == [
"possession_id",
"n_frames",
"start_timestamp",
"end_timestamp",
"start_frame_id",
"end_frame_id",
]
assert model.output.columns == [
Column.GAME_ID,
Column.PERIOD_ID,
Column.BALL_OWNING_TEAM_ID,
"possession_id",
Column.OBJECT_ID,
Column.TEAM_ID,
"position",
"formation",
"is_attacking",
]
assert len(model.output) == 23

single_possession = model.output.filter(pl.col("possession_id") == 1)
assert (
single_possession.filter(pl.col(Column.OBJECT_ID) == "DFL-OBJ-00008F")[
"position"
][0]
== "CB"
)
assert (
single_possession.filter(pl.col(Column.OBJECT_ID) == "DFL-OBJ-00008F")[
"formation"
][0]
== "3232"
)
assert (
single_possession.filter(pl.col(Column.OBJECT_ID) == "DFL-OBJ-00008F")[
"is_attacking"
][0]
== False
)
assert (
single_possession.filter(pl.col(Column.OBJECT_ID) == "DFL-OBJ-002FXT")[
"position"
][0]
== "LW"
)
assert (
single_possession.filter(pl.col(Column.OBJECT_ID) == "DFL-OBJ-002FXT")[
"formation"
][0]
== "3241"
)
assert (
single_possession.filter(pl.col(Column.OBJECT_ID) == "DFL-OBJ-002FXT")[
"is_attacking"
][0]
== True
)

assert (
single_possession.filter(pl.col(Column.OBJECT_ID) == "DFL-OBJ-0001HW")[
"position"
][0]
== "GK"
)
assert (
single_possession.filter(pl.col(Column.OBJECT_ID) == "DFL-OBJ-0028FW")[
"position"
][0]
== "GK"
)

def test_efpi_period_442(self, kloppy_polars_sportec_dataset: KloppyPolarsDataset):
model = EFPI(
dataset=kloppy_polars_sportec_dataset,
)

model = model.fit(
formations=["442"],
every="period",
substitutions="drop",
change_threshold=0.1,
change_after_possession=True,
)

assert isinstance(model.segments, pl.DataFrame)
assert len(model.segments) == 1
assert model.segments.columns == [
"period_id",
"n_frames",
"start_timestamp",
"end_timestamp",
"start_frame_id",
"end_frame_id",
]
assert model.output.columns == [
Column.GAME_ID,
Column.PERIOD_ID,
Column.BALL_OWNING_TEAM_ID,
Column.OBJECT_ID,
Column.TEAM_ID,
"position",
"formation",
"is_attacking",
]
assert len(model.output) == 23

single_period = model.output.filter(pl.col("period_id") == 1)
assert (
single_period.filter(pl.col(Column.OBJECT_ID) == "DFL-OBJ-00008F")[
"position"
][0]
== "RCB"
)
assert (
single_period.filter(pl.col(Column.OBJECT_ID) == "DFL-OBJ-00008F")[
"formation"
][0]
== "442"
)
assert (
single_period.filter(pl.col(Column.OBJECT_ID) == "DFL-OBJ-00008F")[
"is_attacking"
][0]
== False
)
assert (
single_period.filter(pl.col(Column.OBJECT_ID) == "DFL-OBJ-002FXT")[
"position"
][0]
== "LM"
)
assert (
single_period.filter(pl.col(Column.OBJECT_ID) == "DFL-OBJ-002FXT")[
"formation"
][0]
== "442"
)
assert (
single_period.filter(pl.col(Column.OBJECT_ID) == "DFL-OBJ-002FXT")[
"is_attacking"
][0]
== True
)

assert (
single_period.filter(pl.col(Column.OBJECT_ID) == "DFL-OBJ-0001HW")[
"position"
][0]
== "GK"
)
assert (
single_period.filter(pl.col(Column.OBJECT_ID) == "DFL-OBJ-0028FW")[
"position"
][0]
== "GK"
)

def test_efpi_wrong(self, kloppy_polars_sportec_dataset):
import pytest
from polars.exceptions import PanicException

with pytest.raises(PanicException):
model = EFPI(dataset=kloppy_polars_sportec_dataset)
model.fit(
formations=["442"],
every="5mm",
substitutions="drop",
change_threshold=0.1,
change_after_possession=True,
)
1 change: 1 addition & 0 deletions unravel/soccer/dataset/kloppy_polars.py
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,7 @@ def __apply_settings(
max_player_acceleration=self._max_player_acceleration,
max_ball_acceleration=self._max_ball_acceleration,
ball_carrier_threshold=self._ball_carrier_threshold,
frame_rate=self.kloppy_dataset.metadata.frame_rate,
)

def load(
Expand Down
1 change: 1 addition & 0 deletions unravel/soccer/models/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
from .pressing_intensity import *
from .formations.efpi import EFPI
from .utils import *
Empty file.
Loading