NINAnor · frafra · Aug 6, 2025 · Aug 7, 2025
diff --git a/Dockerfile b/Dockerfile
@@ -1,18 +1,15 @@
-FROM python:3.10
+FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS base
+
+FROM base AS model-downloader
 
-# Pre-download Whisper model using ADD
-ARG WHISPER_MODEL_URL=https://openaipublic.azureedge.net/main/whisper/models/345ae4da62f9b3d59415adc60127b97c714f32e89e936602e85993674d08dcb1/medium.pt
 ARG WHISPER_MODEL=medium
-RUN mkdir -p /root/.cache/whisper
-ADD ${WHISPER_MODEL_URL} /root/.cache/whisper/${WHISPER_MODEL}.pt
+RUN uv pip install --system faster-whisper
+RUN python -c "from faster_whisper import WhisperModel; model = WhisperModel('${WHISPER_MODEL}', device='cpu')"
 
-# Remove apt auto-clean hook to preserve cache
-RUN rm -f /etc/apt/apt.conf.d/docker-clean
+FROM base
 
-# Install system dependencies with cache mount
-RUN --mount=type=cache,target=/var/cache/apt \
-    --mount=type=cache,target=/var/lib/apt/lists \
-    apt update && apt install -y --no-install-recommends ffmpeg
+# Copy model cache from first stage
+COPY --from=model-downloader /root/.cache /root/.cache
 
 # Copy uv binary
 COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/uv

diff --git a/app.py b/app.py
@@ -5,25 +5,87 @@
 
 import dash
 import dash_bootstrap_components as dbc
-import torch
-import whisper
 from dash import Input, Output, State, dcc, html
+from faster_whisper import WhisperModel
 
 UPLOAD_FOLDER = tempfile.mkdtemp(dir=os.getcwd())
 
 # Define instructions and error messages
 instr = [
     [html.B("1. UPLOAD .MP3: "), "Click on the Drag and Drop box below"],
+    [html.B("2. SELECT LANGUAGE: "), "Choose the source language of your audio file"],
     [
-        html.B("2. ANALYZE: "),
+        html.B("3. ANALYZE: "),
         "Click 'Analyze'. You should see a spinner indicating that the file is being translated.",
     ],
     [
-        html.B("3. DOWNLOAD: "),
+        html.B("4. DOWNLOAD: "),
         "Once the analysis is finished, click on the button 'Download transcription' or 'Download translation' to get the results as a .srt file.",
     ],
 ]
 
+# Common languages supported by Whisper
+SUPPORTED_LANGUAGES = [
+    {"label": "Auto-detect", "value": None},
+    {"label": "Afrikaans", "value": "af"},
+    {"label": "Arabic", "value": "ar"},
+    {"label": "Armenian", "value": "hy"},
+    {"label": "Azerbaijani", "value": "az"},
+    {"label": "Belarusian", "value": "be"},
+    {"label": "Bosnian", "value": "bs"},
+    {"label": "Bulgarian", "value": "bg"},
+    {"label": "Catalan", "value": "ca"},
+    {"label": "Chinese", "value": "zh"},
+    {"label": "Croatian", "value": "hr"},
+    {"label": "Czech", "value": "cs"},
+    {"label": "Danish", "value": "da"},
+    {"label": "Dutch", "value": "nl"},
+    {"label": "English", "value": "en"},
+    {"label": "Estonian", "value": "et"},
+    {"label": "Finnish", "value": "fi"},
+    {"label": "French", "value": "fr"},
+    {"label": "Galician", "value": "gl"},
+    {"label": "German", "value": "de"},
+    {"label": "Greek", "value": "el"},
+    {"label": "Hebrew", "value": "he"},
+    {"label": "Hindi", "value": "hi"},
+    {"label": "Hungarian", "value": "hu"},
+    {"label": "Icelandic", "value": "is"},
+    {"label": "Indonesian", "value": "id"},
+    {"label": "Italian", "value": "it"},
+    {"label": "Japanese", "value": "ja"},
+    {"label": "Kannada", "value": "kn"},
+    {"label": "Kazakh", "value": "kk"},
+    {"label": "Korean", "value": "ko"},
+    {"label": "Latvian", "value": "lv"},
+    {"label": "Lithuanian", "value": "lt"},
+    {"label": "Macedonian", "value": "mk"},
+    {"label": "Malay", "value": "ms"},
+    {"label": "Marathi", "value": "mr"},
+    {"label": "Maori", "value": "mi"},
+    {"label": "Nepali", "value": "ne"},
+    {"label": "Norwegian", "value": "no"},
+    {"label": "Persian", "value": "fa"},
+    {"label": "Polish", "value": "pl"},
+    {"label": "Portuguese", "value": "pt"},
+    {"label": "Romanian", "value": "ro"},
+    {"label": "Russian", "value": "ru"},
+    {"label": "Serbian", "value": "sr"},
+    {"label": "Slovak", "value": "sk"},
+    {"label": "Slovenian", "value": "sl"},
+    {"label": "Spanish", "value": "es"},
+    {"label": "Swahili", "value": "sw"},
+    {"label": "Swedish", "value": "sv"},
+    {"label": "Tagalog", "value": "tl"},
+    {"label": "Tamil", "value": "ta"},
+    {"label": "Thai", "value": "th"},
+    {"label": "Turkish", "value": "tr"},
+    {"label": "Ukrainian", "value": "uk"},
+    {"label": "Urdu", "value": "ur"},
+    {"label": "Vietnamese", "value": "vi"},
+    {"label": "Welsh", "value": "cy"},
+]
+
 
 # Save the uploaded file
 def save_uploaded_file(contents, filename, folder=UPLOAD_FOLDER):
@@ -64,13 +126,33 @@ def write_srt(transcript: Iterator[dict], file: TextIO):
         )
 
 
-def translate_transcribe_file(file_path):
-    try:
-        model = whisper.load_model("medium")
-    except torch.OutOfMemoryError:  # fallback
-        model = whisper.load_model("tiny", device="cpu")
-    translation = model.transcribe(file_path, language="no", task="translate")
-    transcription = model.transcribe(file_path, language="no")
+def translate_transcribe_file(file_path, language=None):
+    model = WhisperModel("medium")
+
+    # Get translation segments
+    if language:
+        translation_segments, _ = model.transcribe(
+            file_path, language=language, task="translate"
+        )
+        transcription_segments, _ = model.transcribe(file_path, language=language)
+    else:
+        translation_segments, _ = model.transcribe(file_path, task="translate")
+        transcription_segments, _ = model.transcribe(file_path)
+
+    # Convert faster_whisper segments to format expected by write_srt
+    translation = {
+        "segments": [
+            {"start": seg.start, "end": seg.end, "text": seg.text}
+            for seg in translation_segments
+        ]
+    }
+
+    transcription = {
+        "segments": [
+            {"start": seg.start, "end": seg.end, "text": seg.text}
+            for seg in transcription_segments
+        ]
+    }
 
     # Create temporary files for translation and transcription
     with tempfile.NamedTemporaryFile(
@@ -123,6 +205,16 @@ def translate_transcribe_file(file_path):
             },
             multiple=False,
         ),
+        html.Div([
+            html.Label("Select Language:", className="form-label"),
+            dcc.Dropdown(
+                id="language-dropdown",
+                options=SUPPORTED_LANGUAGES,
+                value="no",  # Default to Norwegian as before
+                placeholder="Choose source language",
+                className="mb-3",
+            ),
+        ], className="mb-3"),
         dbc.Button(
             "Analyze",
             id="analyze-button",
@@ -194,15 +286,15 @@ def update_upload_box_style(contents):
         Output("results-output", "data-transcription-path"),
     ],
     [Input("analyze-button", "n_clicks")],
-    [State("folder-upload", "contents"), State("folder-upload", "filename")],
+    [State("folder-upload", "contents"), State("folder-upload", "filename"), State("language-dropdown", "value")],
 )
-def analyze_file(n_clicks, content, filename):
+def analyze_file(n_clicks, content, filename, selected_language):
     if n_clicks > 0:
         if not filename or not content:
             return "", dbc.Alert("No file uploaded!", color="danger"), "", ""
         file_path = save_uploaded_file(content, filename)
         if os.path.exists(file_path):
-            translation_path, transcription_path = translate_transcribe_file(file_path)
+            translation_path, transcription_path = translate_transcribe_file(file_path, selected_language)
             return (
                 html.Div("File has been analyzed successfully!"),
                 "",

diff --git a/pyproject.toml b/pyproject.toml
@@ -10,5 +10,5 @@ requires-python = ">=3.8"
 dependencies = [
     "dash==2.14.0",
     "dash-bootstrap-components>=1.5.0",
-    "openai-whisper>=20250625",
+    "faster-whisper>=1.1.0",
 ]
diff --git a/translate.py b/translate.py
@@ -1,12 +1,19 @@
-import whisper
+from faster_whisper import WhisperModel
 import argparse
 
 
-def translate(file):
-    model = whisper.load_model("medium")
-    results = model.transcribe(file, language="no", task="translate")
+def translate(file, language=None):
+    model = WhisperModel("medium")
+    if language:
+        segments, info = model.transcribe(file, language=language, task="translate")
+    else:
+        segments, info = model.transcribe(file, task="translate")
+
+    # Extract text from segments
+    full_text = " ".join([seg.text for seg in segments])
+
     with open("translation.txt", "w", encoding="utf-8") as txt:
-        txt.write(results["text"])
+        txt.write(full_text)
 
 
 if __name__ == "__main__":
@@ -18,7 +25,15 @@ def translate(file):
         required=True,
         type=str,
     )
+
+    parser.add_argument(
+        "--language",
+        help="Source language code (e.g., 'no', 'en', 'fr'). If not specified, auto-detect will be used.",
+        required=False,
+        type=str,
+        default=None,
+    )
 
     cli_args = parser.parse_args()
 
-    translate(cli_args.input)
+    translate(cli_args.input, cli_args.language)
diff --git a/utils/get_model_url.py b/utils/get_model_url.py