Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 8 additions & 11 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,18 +1,15 @@
FROM python:3.10
FROM ghcr.io/astral-sh/uv:python3.12-bookworm-slim AS base

FROM base AS model-downloader

# Pre-download Whisper model using ADD
ARG WHISPER_MODEL_URL=https://openaipublic.azureedge.net/main/whisper/models/345ae4da62f9b3d59415adc60127b97c714f32e89e936602e85993674d08dcb1/medium.pt
ARG WHISPER_MODEL=medium
RUN mkdir -p /root/.cache/whisper
ADD ${WHISPER_MODEL_URL} /root/.cache/whisper/${WHISPER_MODEL}.pt
RUN uv pip install --system faster-whisper
RUN python -c "from faster_whisper import WhisperModel; model = WhisperModel('${WHISPER_MODEL}', device='cpu')"

# Remove apt auto-clean hook to preserve cache
RUN rm -f /etc/apt/apt.conf.d/docker-clean
FROM base

# Install system dependencies with cache mount
RUN --mount=type=cache,target=/var/cache/apt \
--mount=type=cache,target=/var/lib/apt/lists \
apt update && apt install -y --no-install-recommends ffmpeg
# Copy model cache from first stage
COPY --from=model-downloader /root/.cache /root/.cache

# Copy uv binary
COPY --from=ghcr.io/astral-sh/uv:latest /uv /bin/uv
Expand Down
120 changes: 106 additions & 14 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,25 +5,87 @@

import dash
import dash_bootstrap_components as dbc
import torch
import whisper
from dash import Input, Output, State, dcc, html
from faster_whisper import WhisperModel

UPLOAD_FOLDER = tempfile.mkdtemp(dir=os.getcwd())

# Define instructions and error messages
instr = [
[html.B("1. UPLOAD .MP3: "), "Click on the Drag and Drop box below"],
[html.B("2. SELECT LANGUAGE: "), "Choose the source language of your audio file"],
[
html.B("2. ANALYZE: "),
html.B("3. ANALYZE: "),
"Click 'Analyze'. You should see a spinner indicating that the file is being translated.",
],
[
html.B("3. DOWNLOAD: "),
html.B("4. DOWNLOAD: "),
"Once the analysis is finished, click on the button 'Download transcription' or 'Download translation' to get the results as a .srt file.",
],
]

# Common languages supported by Whisper
SUPPORTED_LANGUAGES = [
{"label": "Auto-detect", "value": None},
{"label": "Afrikaans", "value": "af"},
{"label": "Arabic", "value": "ar"},
{"label": "Armenian", "value": "hy"},
{"label": "Azerbaijani", "value": "az"},
{"label": "Belarusian", "value": "be"},
{"label": "Bosnian", "value": "bs"},
{"label": "Bulgarian", "value": "bg"},
{"label": "Catalan", "value": "ca"},
{"label": "Chinese", "value": "zh"},
{"label": "Croatian", "value": "hr"},
{"label": "Czech", "value": "cs"},
{"label": "Danish", "value": "da"},
{"label": "Dutch", "value": "nl"},
{"label": "English", "value": "en"},
{"label": "Estonian", "value": "et"},
{"label": "Finnish", "value": "fi"},
{"label": "French", "value": "fr"},
{"label": "Galician", "value": "gl"},
{"label": "German", "value": "de"},
{"label": "Greek", "value": "el"},
{"label": "Hebrew", "value": "he"},
{"label": "Hindi", "value": "hi"},
{"label": "Hungarian", "value": "hu"},
{"label": "Icelandic", "value": "is"},
{"label": "Indonesian", "value": "id"},
{"label": "Italian", "value": "it"},
{"label": "Japanese", "value": "ja"},
{"label": "Kannada", "value": "kn"},
{"label": "Kazakh", "value": "kk"},
{"label": "Korean", "value": "ko"},
{"label": "Latvian", "value": "lv"},
{"label": "Lithuanian", "value": "lt"},
{"label": "Macedonian", "value": "mk"},
{"label": "Malay", "value": "ms"},
{"label": "Marathi", "value": "mr"},
{"label": "Maori", "value": "mi"},
{"label": "Nepali", "value": "ne"},
{"label": "Norwegian", "value": "no"},
{"label": "Persian", "value": "fa"},
{"label": "Polish", "value": "pl"},
{"label": "Portuguese", "value": "pt"},
{"label": "Romanian", "value": "ro"},
{"label": "Russian", "value": "ru"},
{"label": "Serbian", "value": "sr"},
{"label": "Slovak", "value": "sk"},
{"label": "Slovenian", "value": "sl"},
{"label": "Spanish", "value": "es"},
{"label": "Swahili", "value": "sw"},
{"label": "Swedish", "value": "sv"},
{"label": "Tagalog", "value": "tl"},
{"label": "Tamil", "value": "ta"},
{"label": "Thai", "value": "th"},
{"label": "Turkish", "value": "tr"},
{"label": "Ukrainian", "value": "uk"},
{"label": "Urdu", "value": "ur"},
{"label": "Vietnamese", "value": "vi"},
{"label": "Welsh", "value": "cy"},
]


# Save the uploaded file
def save_uploaded_file(contents, filename, folder=UPLOAD_FOLDER):
Expand Down Expand Up @@ -64,13 +126,33 @@ def write_srt(transcript: Iterator[dict], file: TextIO):
)


def translate_transcribe_file(file_path):
try:
model = whisper.load_model("medium")
except torch.OutOfMemoryError: # fallback
model = whisper.load_model("tiny", device="cpu")
translation = model.transcribe(file_path, language="no", task="translate")
transcription = model.transcribe(file_path, language="no")
def translate_transcribe_file(file_path, language=None):
model = WhisperModel("medium")

# Get translation segments
if language:
translation_segments, _ = model.transcribe(
file_path, language=language, task="translate"
)
transcription_segments, _ = model.transcribe(file_path, language=language)
else:
translation_segments, _ = model.transcribe(file_path, task="translate")
transcription_segments, _ = model.transcribe(file_path)

# Convert faster_whisper segments to format expected by write_srt
translation = {
"segments": [
{"start": seg.start, "end": seg.end, "text": seg.text}
for seg in translation_segments
]
}

transcription = {
"segments": [
{"start": seg.start, "end": seg.end, "text": seg.text}
for seg in transcription_segments
]
}

# Create temporary files for translation and transcription
with tempfile.NamedTemporaryFile(
Expand Down Expand Up @@ -123,6 +205,16 @@ def translate_transcribe_file(file_path):
},
multiple=False,
),
html.Div([
html.Label("Select Language:", className="form-label"),
dcc.Dropdown(
id="language-dropdown",
options=SUPPORTED_LANGUAGES,
value="no", # Default to Norwegian as before
placeholder="Choose source language",
className="mb-3",
),
], className="mb-3"),
dbc.Button(
"Analyze",
id="analyze-button",
Expand Down Expand Up @@ -194,15 +286,15 @@ def update_upload_box_style(contents):
Output("results-output", "data-transcription-path"),
],
[Input("analyze-button", "n_clicks")],
[State("folder-upload", "contents"), State("folder-upload", "filename")],
[State("folder-upload", "contents"), State("folder-upload", "filename"), State("language-dropdown", "value")],
)
def analyze_file(n_clicks, content, filename):
def analyze_file(n_clicks, content, filename, selected_language):
if n_clicks > 0:
if not filename or not content:
return "", dbc.Alert("No file uploaded!", color="danger"), "", ""
file_path = save_uploaded_file(content, filename)
if os.path.exists(file_path):
translation_path, transcription_path = translate_transcribe_file(file_path)
translation_path, transcription_path = translate_transcribe_file(file_path, selected_language)
return (
html.Div("File has been analyzed successfully!"),
"",
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,5 @@ requires-python = ">=3.8"
dependencies = [
"dash==2.14.0",
"dash-bootstrap-components>=1.5.0",
"openai-whisper>=20250625",
"faster-whisper>=1.1.0",
]
27 changes: 21 additions & 6 deletions translate.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,19 @@
import whisper
from faster_whisper import WhisperModel
import argparse


def translate(file):
model = whisper.load_model("medium")
results = model.transcribe(file, language="no", task="translate")
def translate(file, language=None):
model = WhisperModel("medium")
if language:
segments, info = model.transcribe(file, language=language, task="translate")
else:
segments, info = model.transcribe(file, task="translate")

# Extract text from segments
full_text = " ".join([seg.text for seg in segments])

with open("translation.txt", "w", encoding="utf-8") as txt:
txt.write(results["text"])
txt.write(full_text)


if __name__ == "__main__":
Expand All @@ -18,7 +25,15 @@ def translate(file):
required=True,
type=str,
)

parser.add_argument(
"--language",
help="Source language code (e.g., 'no', 'en', 'fr'). If not specified, auto-detect will be used.",
required=False,
type=str,
default=None,
)

cli_args = parser.parse_args()

translate(cli_args.input)
translate(cli_args.input, cli_args.language)
21 changes: 0 additions & 21 deletions utils/get_model_url.py

This file was deleted.