quanted · StaceyD22 · Apr 15, 2025 · Apr 21, 2025 · May 7, 2025 · May 7, 2025
diff --git a/.github/workflows/testing-code.yml b/.github/workflows/testing-code.yml
@@ -0,0 +1,25 @@
+name: Run Unit Test via Pytest
+
+on: [push]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.9", "3.10", "3.11"]
+
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+      - name: Test with pytest
+        run: |
+          pytest tests
+        continue-on-error: true
diff --git a/app/__init_.py b/app/__init_.py
diff --git a/app/constants.py b/app/constants.py
@@ -0,0 +1,72 @@
+# Define pos/neg/neutral adduct lists
+# Proton added - we observe Mass-(H+) and Mass+(Adduct)
+NEG_ADDUCT_LI = [
+    ("Cl", 35.976678),
+    ("Br", 79.926161),
+    ("HCO2", 46.005477),
+    ("CH3CO2", 60.021127),
+    ("CF3CO2", 113.992862),
+]
+
+# Proton subtracted - we observe Mass+(H+) and Mass+(Adduct)
+POS_ADDUCT_LI = [
+    ("Na", 21.981942),
+    ("K", 37.955882),
+    ("NH4", 17.026547),
+]
+
+NEUTRAL_LOSSES_LI = [
+    ("H2O", -18.010565),
+    ("2H2O", -36.02113),
+    ("3H2O", -54.031695),
+    ("4H2O", -72.04226),
+    ("5H2O", -90.052825),
+    ("NH3", -17.0265),
+    ("O", -15.99490),
+    ("CO", -29.00220),
+    ("CO2", -43.989829),
+    ("C2H4", -28.03130),
+    ("CH2O2", 46.00550),  # note here and below - not losses? but still neutral?
+    ("CH3COOH", 60.02110),
+    ("CH3OH", 32.02620),
+    ("CH3CN", 41.02650),
+    ("(CH3)2CHOH", 60.05810),
+]
+
+# Set to tested memory capacity of WebApp for number of features in 'adduct_matrix'
+MAX_NUM_ADDUCT_FEATURES = 12000
+
+# Column names accessed throughout app
+FEATURE_ID_COL = "Feature ID"
+DASHBOARD_SEARCH_COL = "For_Dashboard_Search"
+FORMULA_COL = "Formula"
+MASS_COL = "Mass"
+RETENTION_COL = "Retention_Time"
+IONIZATION_COL = "Ionization_Mode"
+MOLECULAR_FORMULA_COL = "MOLECULAR_FORMULA"
+
+# Format lists to test values agains
+ALLOWED_BLANK_FORMATS_LIST = ["Blank", "blank", "BLANK", "MB", "Mb", "mb", "mB"]
+ACTIVE_COLUMNS_LIST = [
+    "Retention_Time",
+    "Mass",
+    "Ionization_Mode",
+    "Compound",
+]
+
+# Establish ordering of all possible front matter (tracer/no tracer, flags/no flags, etc.)
+FRONT_MATTER_ORDERING = [
+    "Ionization_Mode",
+    "Mass",
+    "Retention_Time",
+    "Compound",
+    "Tracer Chemical Match?",
+    "Duplicate Feature?",
+    "Is Adduct or Loss?",
+    "Has Adduct or Loss?",
+    "Adduct or Loss Info",
+    "Final Occurrence Count",
+    "Final Occurrence Percentage",
+    "Final Occurrence Count (with flags)",
+    "Final Occurrence Percentage (with flags)",
+]
diff --git a/app/feature/tests/test_feature.py b/app/feature/tests/test_feature.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 import unittest
-from Feature import Feature_MS2 as ms2
+from feature import Feature_MS2 as ms2
 from test_data import parsedMGF as mgfData
 
 #Note about test mgfData

diff --git a/app/ms1/__init__.py b/app/ms1/__init__.py
diff --git a/app/ms1/nta_task.py b/app/ms1/nta_task.py
@@ -6,6 +6,7 @@
 import traceback
 import shutil
 import json
+from typing import Union
 from datetime import datetime
 from dask.distributed import Client, LocalCluster, fire_and_forget
 from zipfile import ZipFile, ZIP_DEFLATED
@@ -35,13 +36,13 @@
 
 def run_nta_dask(
     parameters,
-    input_dfs,
-    tracer_df=None,
-    run_sequence_pos_df=None,
-    run_sequence_neg_df=None,
-    qnta_df=None,
-    jobid="00000000",
-    verbose=True,
+    input_dfs: list[Union[pd.DataFrame, None]],
+    tracer_df: Union[pd.DataFrame, None] = None,
+    run_sequence_pos_df: Union[pd.DataFrame, None] = None,
+    run_sequence_neg_df: Union[pd.DataFrame, None] = None,
+    qnta_df: Union[pd.DataFrame, None] = None,
+    jobid = "00000000",
+    verbose = True,
 ):
     in_docker = os.environ.get("IN_DOCKER") != "False"
     mongo_address = os.environ.get("MONGO_SERVER")
@@ -96,14 +97,14 @@ def run_nta_dask(
 
 def run_nta(
     parameters,
-    input_dfs,
-    tracer_df=None,
-    run_sequence_pos_df=None,
-    run_sequence_neg_df=None,
-    qnta_df=None,
-    mongo_address=None,
-    jobid="00000000",
-    verbose=True,
+    input_dfs: list[Union[pd.DataFrame, None]],
+    tracer_df: Union[pd.DataFrame, None] = None,
+    run_sequence_pos_df: Union[pd.DataFrame, None] = None,
+    run_sequence_neg_df: Union[pd.DataFrame, None] = None,
+    qnta_df: Union[pd.DataFrame, None] = None,
+    jobid = "00000000",
+    verbose = True,
+    mongo_address: Union[str, None] = None,
     in_docker=True,
 ):
     nta_run = NtaRun(
@@ -140,7 +141,7 @@ def __init__(
         run_sequence_pos_df=None,
         run_sequence_neg_df=None,
         qnta_df=None,
-        mongo_address=None,
+        mongo_address: Union[str, None] = None,
         jobid="00000000",
         verbose=True,
         in_docker=True,
@@ -406,7 +407,7 @@ def check_existence_of_mass_column(self, input_dfs):
 
         return
 
-    def check_retention_time_column(self, input_dfs):
+    def check_retention_time_column(self, input_dfs: list[Union[pd.DataFrame, None]]):
         """
         Check for the existence of alternate spellings of 'Retention_Time' column in input dataframes and rename to "Retention_Time".
 
@@ -612,7 +613,7 @@ def pass_through_cols(self):
         self.dfs = [task_fun.passthrucol(df, self.all_headers)[1] if df is not None else None for df in self.dfs]
         return
 
-    def filter_void_volume(self, min_rt):
+    def filter_void_volume(self, min_rt: float):
         """
         Accesses self.dfs (list of dataframes) and self.parameters["minimum_rt"][1]
         then removes all rows with a value below "minimum_rt" in the "Retention_Time"