Skip to content

Commit 50b83aa

Browse files
zubeydecivelekzzacharo
authored andcommitted
subtitles: support srt files and convert to vtt
1 parent 6bd4523 commit 50b83aa

File tree

4 files changed

+83
-16
lines changed

4 files changed

+83
-16
lines changed

cds/modules/deposit/ext.py

Lines changed: 64 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,18 @@
2626

2727
import re
2828
import mimetypes
29+
import tempfile
30+
import os
31+
import shutil
2932

3033
from invenio_base.signals import app_loaded
3134
from invenio_db import db
32-
from invenio_files_rest.models import ObjectVersionTag
35+
from invenio_files_rest.models import ObjectVersion, ObjectVersionTag
3336
from invenio_files_rest.signals import file_uploaded
3437
from invenio_files_rest.errors import InvalidKeyError
3538
from invenio_indexer.signals import before_record_index
3639
from invenio_records_files.utils import sorted_files_from_bucket
40+
from srt_to_vtt import srt_to_vtt
3741

3842
from ..invenio_deposit.signals import post_action
3943
from .indexer import cdsdeposit_indexer_receiver
@@ -45,16 +49,66 @@
4549
)
4650

4751

52+
def _create_vtt_from_srt(srt_obj):
53+
"""Create a VTT file from an SRT file.
54+
55+
:param srt_obj: ObjectVersion of the SRT file
56+
:returns: ObjectVersion of the created VTT file or None
57+
"""
58+
# Generate VTT filename from SRT filename
59+
vtt_key = srt_obj.key.rsplit(".", 1)[0] + ".vtt"
60+
61+
# Check if VTT file already exists
62+
existing_vtt = ObjectVersion.get(srt_obj.bucket_id, vtt_key)
63+
if existing_vtt:
64+
# If it exists, skip
65+
return existing_vtt
66+
67+
# Ensure the SRT file has a file instance
68+
if not srt_obj.file or not srt_obj.file.uri:
69+
return None
70+
71+
srt_path = srt_obj.file.uri
72+
tmp_dir = None
73+
try:
74+
# Create temporary directory for VTT file
75+
tmp_dir = tempfile.mkdtemp()
76+
vtt_path = os.path.join(tmp_dir, vtt_key)
77+
78+
# Convert using srt-to-vtt library
79+
srt_to_vtt(srt_path, vtt_path)
80+
81+
# Create VTT ObjectVersion
82+
vtt_obj = ObjectVersion.create(
83+
bucket=srt_obj.bucket,
84+
key=vtt_key,
85+
stream=open(vtt_path, "rb"),
86+
size=os.path.getsize(vtt_path),
87+
)
88+
_create_tags(vtt_obj)
89+
return vtt_obj
90+
except (OSError, IOError, AttributeError, Exception):
91+
return None
92+
finally:
93+
# Clean up temporary directory
94+
if tmp_dir and os.path.exists(tmp_dir):
95+
try:
96+
shutil.rmtree(tmp_dir)
97+
except OSError:
98+
pass
99+
100+
48101
def _create_tags(obj):
49102
"""Create additional tags for file."""
50103
pattern_subtitle = re.compile(r".*_([a-zA-Z]{2})\.vtt$")
51104
pattern_poster = re.compile(r"^poster\.(jpg|png)$")
52-
105+
53106
# Get the media_type and content_type(file ext)
54107
file_name = obj.key
55108
mimetypes.add_type("subtitle/vtt", ".vtt")
109+
mimetypes.add_type("text/srt", ".srt")
56110
guessed_type = mimetypes.guess_type(file_name)[0]
57-
if guessed_type is None:
111+
if guessed_type is None:
58112
raise InvalidKeyError(description=f"Unsupported File: {file_name}")
59113

60114
media_type = guessed_type.split("/")[0]
@@ -73,7 +127,13 @@ def _create_tags(obj):
73127
# other tags
74128
ObjectVersionTag.create_or_update(obj, "content_type", "vtt")
75129
ObjectVersionTag.create_or_update(obj, "context_type", "subtitle")
76-
# poster tag
130+
elif file_ext == "srt":
131+
# Create VTT version from SRT
132+
try:
133+
_create_vtt_from_srt(obj)
134+
except Exception:
135+
pass
136+
# poster tag
77137
elif pattern_poster.match(file_name):
78138
ObjectVersionTag.create_or_update(obj, "context_type", "poster")
79139

cds/modules/deposit/static/templates/cds_deposit/types/video/uploader.html

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -148,8 +148,8 @@ <h5 class="text-muted"><strong>Tips and suggestions</strong></h5>
148148
ngf-model-options="{allowInvalid: false}"
149149
ngf-change="$ctrl.addFiles($newFiles, $invalidFiles)"
150150
ngf-select=""
151-
ngf-pattern="'.vtt'"
152-
ngf-accept="'.vtt'"
151+
ngf-pattern="'.vtt,.srt'"
152+
ngf-accept="'.vtt,.srt'"
153153
ngf-validate-fn="$ctrl.validateSubtitles($file)"
154154
ngf-max-size="500GB"
155155
><i class="fa fa-plus-square"></i></a>
@@ -167,8 +167,8 @@ <h5 class="text-muted"><strong>Tips and suggestions</strong></h5>
167167
ng-if="!$ctrl.cdsDepositCtrl.isPublished()"
168168
ngf-select=""
169169
ngf-change="$ctrl.addFiles($newFiles, $invalidFiles)"
170-
ngf-pattern="'.vtt'"
171-
ngf-accept="'text/vtt'"
170+
ngf-pattern="'.vtt,.srt'"
171+
ngf-accept="'text/vtt,.vtt,.srt'"
172172
ngf-validate-fn="$ctrl.validateSubtitles($file)"
173173
ngf-max-size="500GB"
174174
ngf-multiple="true"
@@ -183,15 +183,15 @@ <h5 class="text-muted"><strong>Tips and suggestions</strong></h5>
183183
ngf-model-options="{allowInvalid: false}"
184184
ngf-change="$ctrl.addFiles($newFiles, $invalidFiles)"
185185
ngf-select=""
186-
ngf-pattern="'.vtt'"
187-
ngf-accept="'.vtt'"
186+
ngf-pattern="'.vtt,.srt'"
187+
ngf-accept="'.vtt,.srt'"
188188
ngf-validate-fn="$ctrl.validateSubtitles($file)"
189-
ngf-max-size="500GB">select</a> <mark>.vtt</mark> files.
189+
ngf-max-size="500GB">select</a> <mark>.vtt</mark> or <mark>.srt</mark> files.
190190
<hr class="my-10" />
191191
<div class="text-muted text-left">
192192
<h5 class="text-muted"><strong>Tips and suggestions</strong></h5>
193193
<ul>
194-
<li>Subtitle filename should have a valid ISO language code. Example: <mark>subtitles_fr.vtt</mark> </li>
194+
<li>Subtitle filename should have a valid ISO language code. Example: <mark>subtitles_fr.vtt</mark> or <mark>subtitles_fr.srt</mark> </li>
195195
</ul>
196196
</div>
197197
</p>

cds/modules/theme/assets/bootstrap3/js/cds_deposit/avc/components/cdsUploader.js

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -284,7 +284,12 @@ function cdsUploaderCtrl(
284284

285285
// Filter out files without a valid MIME type or with zero size
286286
_files = _files.filter((file) => {
287-
if (!file.type || file.type.trim() === "") {
287+
// Allow SRT and VTT files even if they don't have a MIME type
288+
var fileName = file.name.toLowerCase();
289+
var isSubtitleFile =
290+
fileName.endsWith(".vtt") || fileName.endsWith(".srt");
291+
292+
if ((!file.type || file.type.trim() === "") && !isSubtitleFile) {
288293
toaster.pop(
289294
"warning",
290295
"Invalid File Type",
@@ -544,13 +549,14 @@ function cdsUploaderCtrl(
544549
this.validateSubtitles = function (_file) {
545550
// Check if the filename matches the pattern and is a valid ISO language
546551
// i.e. jessica_jones-en.vtt
547-
var match = _file.name.match(/(?:.+)[_|-]([a-zA-Z]{2}).vtt/) || [];
552+
var match = _file.name.match(/(?:.+)[_|-]([a-zA-Z]{2})\.(vtt|srt)/) || [];
548553
return match.length > 1 && match[1] in isoLanguages;
549554
};
550555

551556
this.validateAdditionalFiles = function (_file) {
552-
// If it's a .vtt file, validate as subtitle
553-
if (_file.name.toLowerCase().endsWith(".vtt")) {
557+
// If it's a .vtt or .srt file, validate as subtitle
558+
var fileName = _file.name.toLowerCase();
559+
if (fileName.endsWith(".vtt") || fileName.endsWith(".srt")) {
554560
return this.validateSubtitles(_file);
555561
}
556562
// Accept other types

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,7 @@ simplekv==0.14.1
184184
six==1.17.0
185185
soupsieve==2.6
186186
speaklater==1.3
187+
srt-to-vtt==1.0.0
187188
SQLAlchemy==1.4.54
188189
SQLAlchemy-Continuum==1.4.1
189190
SQLAlchemy-Utils==0.38.3

0 commit comments

Comments
 (0)