engagement functions

HernandoMV · HernandoMV · commit b6ba7e60d0ae · 2025-07-16T15:20:40.000+02:00
diff --git a/lecilab_behavior_analysis/utils.py b/lecilab_behavior_analysis/utils.py
@@ -825,3 +825,83 @@ def filter_variables_for_model(dic_fit:dict, X:list, y:str, max_lag=None, tau=No
 
     return corr_mat_list, norm_contribution_df
 
+
+def find_next_end_task_time_in_events(events_df: pd.DataFrame, date: str, subject: str) -> Tuple[Union[str, None], Union[float, None]]:
+    """
+    Find the end task time in the events dataframe for a given date.
+    """
+    # filter events
+    filtered_events = events_df[events_df['description'] == "The subject has returned home."]
+    
+    # get the first event after the given date
+    end_event = filtered_events[filtered_events['date'] > date]
+    # if there are no events after the given date, return None
+    if end_event.empty:
+        print(f"No end event found after date {date}.")
+        return "No date", None
+    # calculate the duration from the given date to the end event
+    duration = (pd.to_datetime(end_event['date'].iloc[0]) - pd.to_datetime(date)).total_seconds()
+    # get the subject as well
+    subject_of_event = end_event['subject'].iloc[0]
+
+    if subject_of_event != subject:
+        print(f"Subject mismatch for {date}: expected {subject}, found {subject_of_event}.")
+        return None, None
+
+    # return the date of the first event
+    return end_event['date'].iloc[0], duration
+
+
+def get_session_box_usage(session_df: pd.DataFrame, session_duration_df: pd.DataFrame) -> pd.DataFrame:
+
+    if session_df.date.unique().size != 1:
+        raise ValueError("Session dataframe must contain data for a single date.")
+
+    #TODO: do the column checker
+
+    date = session_df.date.unique()[0]
+    subject = session_df.subject.unique()[0]
+    session_duration = session_duration_df[session_duration_df['date'] == date].duration.values[0]
+    time_to_complete_first_trial = session_df.iloc[0].trial_duration
+    start_of_first_trial = session_df.iloc[0].TRIAL_START
+    last_trial_completed_time = session_df.iloc[-1].TRIAL_END - start_of_first_trial
+    time_to_exit_box = session_duration - last_trial_completed_time
+    # add accuracy as well
+    accuracy = session_df['correct'].mean() * 100
+
+    # add the time of engagement and disengagement, removing the first trial
+    session_df = session_df.iloc[1:]  # remove the first trial for engagement calculation
+    engaged_time = session_df[session_df['engaged'] == True]['trial_duration'].sum()
+    disengaged_time = session_df[session_df['engaged'] == False]['trial_duration'].sum()
+
+    unaccounted_time = session_duration - (time_to_complete_first_trial + time_to_exit_box +
+                                           engaged_time + disengaged_time)
+    
+    total_session_time = time_to_complete_first_trial + time_to_exit_box + engaged_time + disengaged_time + unaccounted_time
+
+    return pd.DataFrame({
+        "date": [date] * 5,
+        "subject": [subject] * 5,
+        "time_type": [
+            "time_to_complete_first_trial",
+            "time_to_exit_box",
+            "engaged_time",
+            "disengaged_time",
+            "unaccounted_time"
+        ],
+        "absolute_time": [
+            time_to_complete_first_trial,
+            time_to_exit_box,
+            engaged_time,
+            disengaged_time,
+            unaccounted_time
+        ],
+        "percentage_of_time": [
+            time_to_complete_first_trial / total_session_time * 100,
+            time_to_exit_box / total_session_time * 100,
+            engaged_time / total_session_time * 100,
+            disengaged_time / total_session_time * 100,
+            unaccounted_time / total_session_time * 100
+        ],
+        "accuracy": [accuracy] * 5,
+    })