@@ -825,3 +825,83 @@ def filter_variables_for_model(dic_fit:dict, X:list, y:str, max_lag=None, tau=No
825825
826826 return corr_mat_list , norm_contribution_df
827827
828+
829+ def find_next_end_task_time_in_events (events_df : pd .DataFrame , date : str , subject : str ) -> Tuple [Union [str , None ], Union [float , None ]]:
830+ """
831+ Find the end task time in the events dataframe for a given date.
832+ """
833+ # filter events
834+ filtered_events = events_df [events_df ['description' ] == "The subject has returned home." ]
835+
836+ # get the first event after the given date
837+ end_event = filtered_events [filtered_events ['date' ] > date ]
838+ # if there are no events after the given date, return None
839+ if end_event .empty :
840+ print (f"No end event found after date { date } ." )
841+ return "No date" , None
842+ # calculate the duration from the given date to the end event
843+ duration = (pd .to_datetime (end_event ['date' ].iloc [0 ]) - pd .to_datetime (date )).total_seconds ()
844+ # get the subject as well
845+ subject_of_event = end_event ['subject' ].iloc [0 ]
846+
847+ if subject_of_event != subject :
848+ print (f"Subject mismatch for { date } : expected { subject } , found { subject_of_event } ." )
849+ return None , None
850+
851+ # return the date of the first event
852+ return end_event ['date' ].iloc [0 ], duration
853+
854+
855+ def get_session_box_usage (session_df : pd .DataFrame , session_duration_df : pd .DataFrame ) -> pd .DataFrame :
856+
857+ if session_df .date .unique ().size != 1 :
858+ raise ValueError ("Session dataframe must contain data for a single date." )
859+
860+ #TODO: do the column checker
861+
862+ date = session_df .date .unique ()[0 ]
863+ subject = session_df .subject .unique ()[0 ]
864+ session_duration = session_duration_df [session_duration_df ['date' ] == date ].duration .values [0 ]
865+ time_to_complete_first_trial = session_df .iloc [0 ].trial_duration
866+ start_of_first_trial = session_df .iloc [0 ].TRIAL_START
867+ last_trial_completed_time = session_df .iloc [- 1 ].TRIAL_END - start_of_first_trial
868+ time_to_exit_box = session_duration - last_trial_completed_time
869+ # add accuracy as well
870+ accuracy = session_df ['correct' ].mean () * 100
871+
872+ # add the time of engagement and disengagement, removing the first trial
873+ session_df = session_df .iloc [1 :] # remove the first trial for engagement calculation
874+ engaged_time = session_df [session_df ['engaged' ] == True ]['trial_duration' ].sum ()
875+ disengaged_time = session_df [session_df ['engaged' ] == False ]['trial_duration' ].sum ()
876+
877+ unaccounted_time = session_duration - (time_to_complete_first_trial + time_to_exit_box +
878+ engaged_time + disengaged_time )
879+
880+ total_session_time = time_to_complete_first_trial + time_to_exit_box + engaged_time + disengaged_time + unaccounted_time
881+
882+ return pd .DataFrame ({
883+ "date" : [date ] * 5 ,
884+ "subject" : [subject ] * 5 ,
885+ "time_type" : [
886+ "time_to_complete_first_trial" ,
887+ "time_to_exit_box" ,
888+ "engaged_time" ,
889+ "disengaged_time" ,
890+ "unaccounted_time"
891+ ],
892+ "absolute_time" : [
893+ time_to_complete_first_trial ,
894+ time_to_exit_box ,
895+ engaged_time ,
896+ disengaged_time ,
897+ unaccounted_time
898+ ],
899+ "percentage_of_time" : [
900+ time_to_complete_first_trial / total_session_time * 100 ,
901+ time_to_exit_box / total_session_time * 100 ,
902+ engaged_time / total_session_time * 100 ,
903+ disengaged_time / total_session_time * 100 ,
904+ unaccounted_time / total_session_time * 100
905+ ],
906+ "accuracy" : [accuracy ] * 5 ,
907+ })
0 commit comments