weitse-hsu
diff --git a/‎ensemble_md/analysis/analyze_free_energy.py‎
Lines changed: 69 additions & 11 deletions b/‎ensemble_md/analysis/analyze_free_energy.py‎
Lines changed: 69 additions & 11 deletions
diff --git a/‎ensemble_md/analysis/analyze_traj.py‎
Lines changed: 91 additions & 2 deletions b/‎ensemble_md/analysis/analyze_traj.py‎
Lines changed: 91 additions & 2 deletions
@@ -189,6 +189,8 @@ def _combine_df_adjacent(df_adjacent, state_ranges, df_err_adjacent=None, err_ty
         A list of lists free energy differences between adjacent states for all replicas.
     state_ranges : list
         A list of lists of showing the state indices sampled by each replica.
+    n_tot : int
+        Number of lambda states
     df_err_adjacent : list, Optional
         A list of lists of uncertainties corresponding to the values of :code:`df_adjacent`. Notably, if
         :code:`df_err_adjacent` is :code:`None`, simple means will be used. Otherwise, inverse-variance weighted
@@ -247,7 +249,47 @@ def _combine_df_adjacent(df_adjacent, state_ranges, df_err_adjacent=None, err_ty
     return df, df_err, overlap_bool
 
 
-def calculate_free_energy(data, state_ranges, df_method="MBAR", err_method="propagate", n_bootstrap=None, seed=None):
+def _calculate_df(estimators):
+    """
+    An internal function used in :func:`calculate_free_energy` to calculate a list of free energies between adjacent
+    states for all replicas.
+
+    Parameters
+    ----------
+    estimators : list
+        A list of estimators fitting the input data for all replicas. With this, the user
+        can access all the free energies and their associated uncertainties for all states and replicas.
+        In our code, these estimators come from the function :func:`_apply_estimators`.
+
+    Returns
+    -------
+    df : float
+        Free energy differences between for specified replica.
+    df_err : float
+        Uncertainties corresponding to the values in :code:`df`.
+
+    See also
+    --------
+    :func:`calculate_free_energy`
+    """
+    # Compute FE estimate
+    df = estimators[0].delta_f_
+    lam = np.linspace(0, 1, num=len(df.index))
+    df.index = lam
+    df.columns = lam
+    est = df.loc[0, 1]
+
+    # Compute FE extimate error
+    df_err = estimators[0].d_delta_f_
+    lam = np.linspace(0, 1, num=len(df_err.index))
+    df_err.index = lam
+    df_err.columns = lam
+    err = df_err.loc[0, 1]
+
+    return est, err
+
+
+def calculate_free_energy(data, state_ranges, df_method="MBAR", err_method="propagate", n_bootstrap=None, seed=None, MTREXEE=False):  # noqa: E501
     """
     Caculates the averaged free energy profile with the chosen method given :math:`u_{nk}` or :math:`dH/dλ` data
     obtained from all replicas of the REXEE simulation. Available methods include TI, BAR, and MBAR. TI
@@ -275,6 +317,8 @@ def calculate_free_energy(data, state_ranges, df_method="MBAR", err_method="prop
     seed : int, Optional
         The random seed for bootstrapping. Only relevant when :code:`err_method` is :code:`"bootstrap"`.
         The default is :code:`None`.
+    MTREXEE : bool
+        Whether this is a MT-REXEE simulation or not
 
     Returns
     -------
@@ -299,10 +343,17 @@ def calculate_free_energy(data, state_ranges, df_method="MBAR", err_method="prop
         >>> f, _, _ = analyze_free_energy.calculate_free_energy(data_list, state_ranges, "MBAR", "propagate")
     """
     n_sim = len(data)
-    n_tot = state_ranges[-1][-1] + 1
+    if MTREXEE is False:
+        n_tot = state_ranges[-1][-1] + 1
+    else:
+        n_tot = state_ranges[-1] + 1
     estimators = _apply_estimators(data, df_method)
-    df_adjacent, df_err_adjacent = _calculate_df_adjacent(estimators)
-    df, df_err, overlap_bool = _combine_df_adjacent(df_adjacent, state_ranges, df_err_adjacent, err_type='propagate')
+    print(estimators)
+    if MTREXEE is False:
+        df_adjacent, df_err_adjacent = _calculate_df_adjacent(estimators)
+        df, df_err, overlap_bool = _combine_df_adjacent(df_adjacent, state_ranges, df_err_adjacent, err_type='propagate')  # noqa: E501
+    else:
+        df, df_err = _calculate_df(estimators)
 
     if err_method == 'bootstrap':
         if seed is not None:
@@ -314,26 +365,33 @@ def calculate_free_energy(data, state_ranges, df_method="MBAR", err_method="prop
         for b in range(n_bootstrap):
             sampled_data = [sampled_data_all[i].iloc[b * len(data[i]):(b + 1) * len(data[i])] for i in range(n_sim)]
             bootstrap_estimators = _apply_estimators(sampled_data, df_method)
-            df_adjacent, df_err_adjacent = _calculate_df_adjacent(bootstrap_estimators)
-            df_sampled, _, overlap_bool = _combine_df_adjacent(df_adjacent, state_ranges, df_err_adjacent, err_type='propagate')  # doesn't matter what value err_type here is # noqa: E501
+            if MTREXEE is False:
+                df_adjacent, df_err_adjacent = _calculate_df_adjacent(bootstrap_estimators)
+                df_sampled, _, overlap_bool = _combine_df_adjacent(df_adjacent, state_ranges, df_err_adjacent, err_type='propagate')  # doesn't matter what value err_type here is # noqa: E501
+            else:
+                df_sampled, _ = _calculate_df(bootstrap_estimators)
             df_bootstrap.append(df_sampled)
         error_bootstrap = np.std(df_bootstrap, axis=0, ddof=1)
 
         # Replace the value in df_err with value in error_bootstrap if df_err corresponds to
         # the df between overlapping states
         for i in range(n_tot - 1):
-            if overlap_bool[i] is True:
+            if MTREXEE is True or overlap_bool[i] is True:
                 print(f'Replaced the propagated error with the bootstrapped error for states {i} and {i + 1}: {df_err[i]:.5f} -> {error_bootstrap[i]:.5f}.')  # noqa: E501
                 df_err[i] = error_bootstrap[i]
     elif err_method == 'propagate':
         pass
     else:
         raise ParameterError('Specified err_method not available.')
 
-    df.insert(0, 0)
-    df_err.insert(0, 0)
-    f = [sum(df[:(i + 1)]) for i in range(len(df))]
-    f_err = [np.sqrt(sum([x**2 for x in df_err[:(i+1)]])) for i in range(len(df_err))]
+    if MTREXEE is False:
+        df.insert(0, 0)
+        df_err.insert(0, 0)
+        f = [sum(df[:(i + 1)]) for i in range(len(df))]
+        f_err = [np.sqrt(sum([x**2 for x in df_err[:(i+1)]])) for i in range(len(df_err))]
+    else:
+        f = df
+        f_err = df_err
 
     return f, f_err, estimators
 
 
@@ -15,10 +15,10 @@
 import matplotlib.pyplot as plt
 from itertools import chain
 from matplotlib.ticker import MaxNLocator
-
 from alchemlyb.parsing.gmx import _get_headers as get_headers
 from alchemlyb.parsing.gmx import _extract_dataframe as extract_dataframe
 from ensemble_md.utils import utils
+import os
 
 
 def extract_state_traj(dhdl):
@@ -106,6 +106,7 @@ def stitch_time_series(files, rep_trajs, shifts=None, dhdl=True, col_idx=-1, sav
     # files_sorted[i] contains the dhdl/plumed output files for starting configuration i sorted
     # based on iteration indices
     files_sorted = [[] for i in range(n_configs)]
+    print(n_iter)
     for i in range(n_configs):
         for j in range(n_iter):
             files_sorted[i].append(files[rep_trajs[i][j]][j])
@@ -539,7 +540,15 @@ def plot_state_hist(trajs, state_ranges, fig_name, stack=True, figsize=None, pre
         hist, bins = np.histogram(traj, bins=np.arange(lower_bound, upper_bound + 1, 1))
         hist_data.append(hist)
     if save_hist is True:
-        np.save('hist_data.npy', hist_data)
+        if len(fig_name.split('/')) > 1:
+            dir_list = []
+            for i in fig_name.split('/')[:-1]:
+                dir_list.append(i)
+                dir_list.append('/')
+            dir_path = ''.join(dir_list)
+            np.save(f'{dir_path}/hist_data.npy', hist_data)
+        else:
+            np.save('hist_data.npy', hist_data)
 
     # Use the same bins for all histograms
     bins = bins[:-1]  # Remove the last bin edge because there are n+1 bin edges for n bins
@@ -685,6 +694,8 @@ def plot_transit_time(trajs, N, fig_prefix=None, dt=None, folder='.'):
     units : str
         The units of the time.
     """
+    import pandas as pd
+
     if dt is None:
         x = np.arange(len(trajs[0]))
         units = 'step'
@@ -824,6 +835,14 @@ def plot_transit_time(trajs, N, fig_prefix=None, dt=None, folder='.'):
                     plt.savefig(f'{folder}/hist_{fig_names[t]}', dpi=600)
                 else:
                     plt.savefig(f'{folder}/{fig_prefix}_hist_{fig_names[t]}', dpi=600)
+    # Save to csv
+    sim_list, rt_list = [], []
+    for n in range(len(t_roundtrip_list)):
+        for rt in t_roundtrip_list[n]:
+            sim_list.append(n)
+            rt_list.append(rt)
+    df_rt = pd.DataFrame({'Sim': sim_list, 'Round Trip Time': rt_list})
+    df_rt.to_csv(f'{folder}/roundtrip_times.csv')
 
     return t_0k_list, t_k0_list, t_roundtrip_list, units
 
@@ -1330,3 +1349,73 @@ def get_delta_w_updates(log_file, plot=False):
         plt.savefig('delta_w_updates.png', dpi=600)
 
     return t_updates, delta_w_updates, equil
+
+
+def concat_sim_traj(working_dir, n_sim, n_iter, gro):
+    """
+    Create a trajectory which is a concatenation off each iterations trajectory
+
+    Parameters
+    ----------
+    working_dir : str
+        path for the current working directory
+    n_sim : int
+        the number of simulations run
+    n_iter : int
+        the number of iterations run
+
+    Returns
+    -------
+    None
+    """
+    import mdtraj as md
+    import os
+    from tqdm import tqdm
+
+    # Create output directory if needed
+    if not os.path.exists(f'{working_dir}/analysis/traj'):
+        os.makedirs(f'{working_dir}/analysis/traj')
+
+    for rep in range(n_sim):
+        if not os.path.exists(f'{working_dir}/analysis/traj/sim{rep}_concat.xtc'):
+            if os.path.exists(f'{working_dir}/sim_{rep}/iteration_0/confout_backup.gro'):
+                name = 'confout_backup'
+            else:
+                name = 'confout'
+            gro_ref = md.load(f'{working_dir}/{gro[rep]}')
+            traj = md.load(f'{working_dir}/sim_{rep}/iteration_0/traj.trr', top=f'{working_dir}/sim_{rep}/iteration_0/{name}.gro')  # noqa: E501
+            traj.superpose(gro_ref, frame=0)
+            for iteration in tqdm(range(1, n_iter)):
+                traj_add = md.load(f'{working_dir}/sim_{rep}/iteration_{iteration}/traj.trr', top=f'{working_dir}/sim_{rep}/iteration_0/{name}.gro')  # noqa: E501
+                traj_add.superpose(gro_ref, frame=0)
+                traj = md.join([traj, traj_add[1:]])
+            print(traj)
+            traj.save_xtc(f'{working_dir}/analysis/traj/sim{rep}_concat.xtc')
+
+
+def concat_xvg(n_sim, n_iter, working_dir):
+    for s in range(n_sim):
+        if os.path.exists(f'{working_dir}/analysis/sim_{s}.xvg'):
+            continue
+        output_file = open(f'{working_dir}/analysis/sim_{s}.xvg', 'w')
+        for i in range(n_iter):
+            input_file = open(f'{working_dir}/sim_{s}/iteration_{i}/dhdl.xvg').readlines()
+            if i == 0:
+                for line in input_file:
+                    output_file.write(line)
+                time_value = float(input_file[-1].split(' ')[0])
+                time_step = np.round(time_value - float(input_file[-2].split(' ')[0]), 4)
+            else:
+                skipped_first = False
+                for line in input_file:
+                    if line[0] != '#' and line[0] != '@':
+                        if skipped_first is False:
+                            skipped_first = True
+                        else:
+                            time_value += time_step
+                            time_str = f'{time_value:.4f}'
+                            n = len(line.split(' ')[0])
+                            new_line = time_str + line[n:]
+                            new_line = time_str + line[n:]
+                            output_file.write(new_line)
+        output_file.close()