From f6a6c59695814344beae1b8c9e1b4cc3f6f16bd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Z=C3=BCbeyde=20Civelek?= Date: Wed, 27 Nov 2024 13:33:22 +0100 Subject: [PATCH 01/41] files: support videos as additional files --- ...Collection - CDS Videos Publish Video.json | 19 +-- README.rst | 21 ++-- cds/modules/deposit/api.py | 10 +- cds/modules/deposit/ext.py | 61 +++++----- .../templates/cds_deposit/deposits.html | 1 + .../cds_deposit/types/video/uploader.html | 93 ++++++++++----- .../cds_records/video/downloads.html | 43 ++++++- .../theme/assets/bootstrap3/js/cds/module.js | 22 +++- .../cds_deposit/avc/components/cdsUploader.js | 112 ++++++++++++++---- .../theme/assets/bootstrap3/scss/cds/cds.scss | 4 + tests/unit/test_video_rest.py | 64 ++++++++++ 11 files changed, 327 insertions(+), 123 deletions(-) diff --git a/Bruno Collection - CDS Videos Publish Video.json b/Bruno Collection - CDS Videos Publish Video.json index fb6898f95..dcd1fb7ec 100644 --- a/Bruno Collection - CDS Videos Publish Video.json +++ b/Bruno Collection - CDS Videos Publish Video.json @@ -79,7 +79,7 @@ "params": [], "body": { "mode": "json", - "json": "{\n \"$schema\": \"https://localhost:5000/schemas/deposits/records/videos/project/project-v1.0.0.json\",\n \"_access\": {\n \"update\": [\n \"admin@test.ch\",\n \"your-egroup@cern.ch\"\n ],\n \"read\": [ // If you want to restrict the project, add access read\n \"your-egroup@cern.ch\"\n ]\n },\n // Add category and type\n \"category\": \"ATLAS\",\n \"type\": \"VIDEO\"\n}", + "json": "{\n \"_access\": {\n \"update\": [\n \"admin@test.ch\",\n \"your-egroup@cern.ch\"\n ],\n \"read\": [ // If you want to restrict the project, add access read\n \"your-egroup@cern.ch\"\n ]\n },\n // Add category and type\n \"category\": \"ATLAS\",\n \"type\": \"VIDEO\"\n}", "formUrlEncoded": [], "multipartForm": [] }, @@ -111,7 +111,7 @@ "params": [], "body": { "mode": "json", - "json": "{\n \"$schema\":\"https://localhost:5000/schemas/deposits/records/videos/video/video-v1.0.0.json\",\n \"_project_id\":\"{{project_id}}\",\n \"title\":\n {\n \"title\":\"your_title\"\n },\n \"_access\": {\n \"read\": [\n \"your-egroup@cern.ch\"\n ]\n },\n \"vr\": false,\n \"featured\": false,\n \"language\": \"en\",\n \"contributors\": [\n {\n \"name\": \"Surname, Name\",\n \"ids\": [\n {\n \"value\": \"cern id\",\n \"source\": \"cern\"\n }\n ],\n \"email\": \"test@cern.ch\",\n \"role\": \"Co-Producer\"\n }\n ],\n \"description\": \"Description\",\n \"date\": \"2024-11-12\",\n \"keywords\":[\n {\n \"name\": \"keyword\",\n \"value\": {\n \"name\": \"keyword\"\n }\n },\n {\n \"name\": \"keyword2\",\n \"value\": {\n \"name\": \"keyword2\"\n }\n }\n ],\n \"related_links\":[\n {\n \"name\": \"related link\",\n \"url\": \"https://relatedlink\"\n }\n ]\n}", + "json": "{\n \"_project_id\":\"{{project_id}}\",\n \"language\":\"en\",\n \"title\":\n {\n \"title\":\"your_title\"\n },\n \"_access\": {\n \"read\": [\n \"your-egroup@cern.ch\"\n ]\n },\n \"vr\": false,\n \"featured\": false,\n \"language\": \"en\",\n \"contributors\": [\n {\n \"name\": \"Surname, Name\",\n \"ids\": [\n {\n \"value\": \"cern id\",\n \"source\": \"cern\"\n }\n ],\n \"email\": \"test@cern.ch\",\n \"role\": \"Co-Producer\"\n }\n ],\n \"description\": \"Description\",\n \"date\": \"2024-11-12\",\n \"keywords\":[\n {\n \"name\": \"keyword\",\n \"value\": {\n \"name\": \"keyword\"\n }\n },\n {\n \"name\": \"keyword2\",\n \"value\": {\n \"name\": \"keyword2\"\n }\n }\n ],\n \"related_links\":[\n {\n \"name\": \"related link\",\n \"url\": \"https://relatedlink\"\n }\n ]\n}", "formUrlEncoded": [], "multipartForm": [] }, @@ -200,7 +200,13 @@ "request": { "url": "{{baseURL}}/api/files/{{bucket_id}}/{{additional_file}}", "method": "PUT", - "headers": [], + "headers": [ + { + "name": "X-Invenio-File-Tags", + "value": "context_type=additional_file", + "enabled": true + } + ], "params": [], "body": { "mode": "json", @@ -284,9 +290,6 @@ "allow": true } }, - "ignore": [ - "node_modules", - ".git" - ] + "ignore": ["node_modules", ".git"] } -} \ No newline at end of file +} diff --git a/README.rst b/README.rst index b27d6d768..376788460 100644 --- a/README.rst +++ b/README.rst @@ -258,11 +258,6 @@ Step 1: Create a Project - **Location** - **Description** - **Required/Optional** - * - **$schema** - - string - - body - - Schema URL for the project creation. - - Required * - **category** - string - body @@ -307,7 +302,6 @@ To restrict the project, add ``_access/read``: .. code-block:: json { - "$schema": "https://localhost:5000/schemas/deposits/records/videos/project/project-v1.0.0.json", "_access": { "update": [ "admin@test.ch", @@ -379,11 +373,6 @@ Step 2: Create a Video - **Location** - **Description** - **Required/Optional** - * - **$schema** - - string - - body - - Schema URL for video creation. - - Required * - **_project_id** - string - body @@ -423,7 +412,7 @@ Step 2: Create a Video - string - body - Language of the video. - - Optional + - Required * - **featured** - boolean - body @@ -447,7 +436,6 @@ To restrict the video, add ``_access/read``. The ``_access/update`` will be the .. code-block:: json { - "$schema":"https://localhost:5000/schemas/deposits/records/videos/video/video-v1.0.0.json", "_project_id":"{{project_id}}", "title": { @@ -495,7 +483,8 @@ To restrict the video, add ``_access/read``. The ``_access/update`` will be the "name": "related link", "url": "https://relatedlink" } - ] + ], + "language": "en" } **Response:** @@ -605,6 +594,10 @@ Step 5: (Optional) Upload Additional File ``PUT`` ``{{baseURL}}/api/files/{{bucket_id}}/{{additional_file}}`` +**Headers:** + +- ``X-Invenio-File-Tags: context_type=additional_file`` + **Parameters:** .. list-table:: diff --git a/cds/modules/deposit/api.py b/cds/modules/deposit/api.py index 4c19ec5c3..90504749f 100644 --- a/cds/modules/deposit/api.py +++ b/cds/modules/deposit/api.py @@ -862,15 +862,7 @@ def _rename_subtitles(self): subtitle_obj_key = "{}_{}.vtt".format( self["report_number"][0], match.group("iso_lang") ) - obj = ObjectVersion.create( - bucket=subtitle_obj.bucket, - key=subtitle_obj_key, - _file_id=subtitle_obj.file_id, - ) - # copy tags to the newly created object version - for tag in subtitle_obj.tags: - tag.object_version = obj - subtitle_obj.remove() + subtitle_obj.key = subtitle_obj_key def _rename_master_file(self, master_file): """Rename master file.""" diff --git a/cds/modules/deposit/ext.py b/cds/modules/deposit/ext.py index 0675488b1..f4a9d6466 100644 --- a/cds/modules/deposit/ext.py +++ b/cds/modules/deposit/ext.py @@ -25,11 +25,13 @@ """CDSDeposit app for Webhook receivers.""" import re +import mimetypes from invenio_base.signals import app_loaded from invenio_db import db from invenio_files_rest.models import ObjectVersionTag from invenio_files_rest.signals import file_uploaded +from invenio_files_rest.errors import InvalidKeyError from invenio_indexer.signals import before_record_index from invenio_records_files.utils import sorted_files_from_bucket @@ -45,38 +47,37 @@ def _create_tags(obj): """Create additional tags for file.""" - # Subtitle file - pattern = re.compile(".*_([a-zA-Z]{2})\.vtt$") + pattern_subtitle = re.compile(r".*_([a-zA-Z]{2})\.vtt$") + pattern_poster = re.compile(r"^poster\.(jpg|png)$") + + # Get the media_type and content_type(file ext) + file_name = obj.key + mimetypes.add_type("subtitle/vtt", ".vtt") + guessed_type = mimetypes.guess_type(file_name)[0] + if guessed_type is None: + raise InvalidKeyError(description=f"Unsupported File: {file_name}") + + media_type = guessed_type.split("/")[0] + file_ext = guessed_type.split("/")[1] + with db.session.begin_nested(): - # language tag - found = pattern.findall(obj.key) - if len(found) == 1: - lang = found[0] - ObjectVersionTag.create_or_update(obj, "language", lang) - else: - # clean to be sure there is no some previous value - ObjectVersionTag.delete(obj, "language") - # other tags - ObjectVersionTag.create_or_update(obj, "content_type", "vtt") - ObjectVersionTag.create_or_update(obj, "context_type", "subtitle") - ObjectVersionTag.create_or_update(obj, "media_type", "subtitle") - # refresh object - db.session.add(obj) + ObjectVersionTag.create_or_update(obj, "content_type", file_ext) + ObjectVersionTag.create_or_update(obj, "media_type", media_type) + if file_ext == "vtt": + # language tag + match = pattern_subtitle.search(file_name) + if match: + ObjectVersionTag.create_or_update(obj, "language", match.group(1)) + else: + ObjectVersionTag.delete(obj, "language") + # other tags + ObjectVersionTag.create_or_update(obj, "content_type", "vtt") + ObjectVersionTag.create_or_update(obj, "context_type", "subtitle") + # poster tag + elif pattern_poster.match(file_name): + ObjectVersionTag.create_or_update(obj, "context_type", "poster") - # Poster frame - pattern = re.compile("^poster\.(jpg|png)$") - try: - poster = pattern.findall(obj.key) - if poster: - ext = pattern.findall(poster.key)[0] - # frame tags - ObjectVersionTag.create_or_update(poster, "content_type", ext) - ObjectVersionTag.create_or_update(poster, "context_type", "poster") - ObjectVersionTag.create_or_update(poster, "media_type", "image") - # refresh object - db.session.add(poster) - except IndexError: - return + db.session.add(obj) def create_tags_on_file_upload(sender, obj): diff --git a/cds/modules/deposit/static/templates/cds_deposit/deposits.html b/cds/modules/deposit/static/templates/cds_deposit/deposits.html index bb9c37b8a..1815ee009 100644 --- a/cds/modules/deposit/static/templates/cds_deposit/deposits.html +++ b/cds/modules/deposit/static/templates/cds_deposit/deposits.html @@ -39,6 +39,7 @@
Tips

Click here to select videos to upload

You can also Drag & Drop video files here

+

supported files {{ $ctrl.videoExtensions }}

diff --git a/cds/modules/deposit/static/templates/cds_deposit/types/video/uploader.html b/cds/modules/deposit/static/templates/cds_deposit/types/video/uploader.html index 872f8f518..24640444d 100644 --- a/cds/modules/deposit/static/templates/cds_deposit/types/video/uploader.html +++ b/cds/modules/deposit/static/templates/cds_deposit/types/video/uploader.html @@ -9,7 +9,7 @@
- Master & Subformats files + Main & Subformats files
@@ -224,12 +224,12 @@
Tips and suggestions
- -
+ +
- Other files + Additional files
-
+

@@ -242,7 +242,7 @@

Tips and suggestions
- +
@@ -271,6 +271,31 @@
Tips and suggestions
Filename Size
+ +
+
+
+
+

+ +

+
+

Upload complimentary files for this video

+

Or Drag & Drop files

+
+
+
+
+
+

@@ -283,39 +308,43 @@
Tips and suggestions
-
-
-
-

- -

-
-
-

Upload complimentary files for this video

+ + +
+
+ Replace Video File +
+
+
+
+
+

+ +

+
+

To replace the video file, just upload a video here.

+

Or Drag & Drop files

+
-

Or Drag & Drop files

-
-
-
Tips and suggestions
-
    -
  • To replace the video file, just upload another video.
  • -
-
+ +
Tips and suggestions
  • Click the Edit button on the top right corner to add more files.
-
+
\ No newline at end of file diff --git a/cds/modules/records/static/templates/cds_records/video/downloads.html b/cds/modules/records/static/templates/cds_records/video/downloads.html index c5fc4888b..aa6e350a8 100644 --- a/cds/modules/records/static/templates/cds_records/video/downloads.html +++ b/cds/modules/records/static/templates/cds_records/video/downloads.html @@ -146,12 +146,12 @@

- -
+ +
- + + + + + + diff --git a/cds/modules/theme/assets/bootstrap3/js/cds/module.js b/cds/modules/theme/assets/bootstrap3/js/cds/module.js index 3d6493724..326b2a6f1 100644 --- a/cds/modules/theme/assets/bootstrap3/js/cds/module.js +++ b/cds/modules/theme/assets/bootstrap3/js/cds/module.js @@ -341,6 +341,24 @@ app.filter("ellipsis", function () { }; }); +app.filter("middleEllipsis", function () { + return function (text, length) { + if (!text || text.length <= length) return text; + + const dotIndex = text.lastIndexOf("."); + const hasExtension = dotIndex > 0; + + if (hasExtension) { + const namePart = text.substring(0, dotIndex); + const extensionPart = text.substring(dotIndex); + + return namePart.substr(0, length) + " [...]" + extensionPart; + } + + return text.substr(0, length) + " [...]"; + }; +}); + // Trust as html app.filter("trustHtml", [ "$sce", @@ -403,7 +421,7 @@ app.filter("getFilesByType", function () { } return files.filter(function (file) { - return types.indexOf(file.context_type) !== -1; + return types.indexOf(file.media_type) !== -1; }); }; }); @@ -429,7 +447,7 @@ app.filter("getAllFilesExcept", function () { } return files.filter(function (file) { - return types.indexOf(file.context_type) == -1; + return types.indexOf(file.media_type) == -1; }); }; }); diff --git a/cds/modules/theme/assets/bootstrap3/js/cds_deposit/avc/components/cdsUploader.js b/cds/modules/theme/assets/bootstrap3/js/cds_deposit/avc/components/cdsUploader.js index dfadbf262..eaa4ae238 100644 --- a/cds/modules/theme/assets/bootstrap3/js/cds_deposit/avc/components/cdsUploader.js +++ b/cds/modules/theme/assets/bootstrap3/js/cds_deposit/avc/components/cdsUploader.js @@ -115,7 +115,10 @@ function cdsUploaderCtrl( if (!upload.key) { upload.key = upload.name; } - if (that.cdsDepositsCtrl.isVideoFile(upload.key)) { + if ( + !upload.isAdditional && + that.cdsDepositsCtrl.isVideoFile(upload.key) + ) { _subpromise = Upload.http(_startWorkflow(upload, response)); } else { var d = $q.defer(); @@ -278,25 +281,44 @@ function cdsUploaderCtrl( } // Remove any invalid files _files = _.difference(_files, invalidFiles || []); + + // Filter out files without a valid MIME type or with zero size + _files = _files.filter((file) => { + if (!file.type || file.type.trim() === "") { + toaster.pop( + "warning", + "Invalid File Type", + `The file ${file.name} has no valid type.` + ); + return false; // Exclude invalid files + } + + if (!file.size || file.size === 0) { + toaster.pop( + "warning", + "Empty File", + `The file ${file.name} is empty and cannot be uploaded.` + ); + return false; // Exclude zero-size files + } + + return true; + }); + // Make sure they have proper metadata angular.forEach(_files, function (file) { file.key = file.name; file.local = !file.receiver; + file.isAdditional = true; // Add any extra paramemters to the files if (extraHeaders) { file.headers = extraHeaders; } + file.headers = { + "X-Invenio-File-Tags": "context_type=additional_file", + }; }); - // Add the files to the list - var masterFile = that.cdsDepositCtrl.findMasterFile() || {}; - var videoFiles = _.values( - that.cdsDepositsCtrl.filterOutFiles(_files).videos - ); - - // Exclude video files - _files = _.difference(_files, videoFiles); - // Find if any of the existing files has been replaced // (file with same filename), and if yes remove it from the existing // file list (aka from the interface). @@ -323,6 +345,44 @@ function cdsUploaderCtrl( Array.prototype.push.apply(that.files, _files); // Add the files to the queue Array.prototype.push.apply(that.queue, _files); + + // Start upload automatically if the option is selected + if (that.autoStartUpload) { + that.upload(); + } + }; + + this.replaceMasterFile = function (_files, invalidFiles) { + // Do nothing if files array is empty + if (!_files) { + return; + } + // Remove any invalid files + _files = _.difference(_files, invalidFiles || []); + // Make sure they have proper metadata + angular.forEach(_files, function (file) { + file.key = file.name; + file.local = !file.receiver; + }); + + // Add the files to the list + var masterFile = that.cdsDepositCtrl.findMasterFile() || {}; + var videoFiles = _.values( + that.cdsDepositsCtrl.filterOutFiles(_files).videos + ); + + if ((invalidFiles || []).length > 0) { + // Push a notification + toaster.pop({ + type: "error", + title: + "Invalid file(s) for " + + (that.cdsDepositCtrl.record.title.title || "video."), + body: _.map(invalidFiles, "name").join(", "), + bodyOutputType: "trustedHtml", + }); + } + if (!that.cdsDepositCtrl.master) { // Check for new master file var newMasterFile = videoFiles[0]; @@ -358,11 +418,6 @@ function cdsUploaderCtrl( }); } } - - // Start upload automatically if the option is selected - if (that.autoStartUpload) { - that.upload(); - } }; // Prepare file request @@ -431,13 +486,26 @@ function cdsUploaderCtrl( function error(response) { // Inform the parents $scope.$emit("cds.deposit.error", response); - // Error uploading notification - toaster.pop({ - type: "error", - title: "Error uploading the file(s).", - body: (_.map(response, "config.data.key") || []).join(", "), - bodyOutputType: "trustedHtml", - }); + // Check if the response contains the error message + if ( + response.status === 400 && + response.data && + response.data.message + ) { + toaster.pop({ + type: "error", + title: response.data.message, + bodyOutputType: "trustedHtml", + }); + } else { + // Error uploading notification + toaster.pop({ + type: "error", + title: "Error uploading the file(s).", + body: (_.map(response, "config.data.key") || []).join(", "), + bodyOutputType: "trustedHtml", + }); + } } ) .finally(function done() { diff --git a/cds/modules/theme/assets/bootstrap3/scss/cds/cds.scss b/cds/modules/theme/assets/bootstrap3/scss/cds/cds.scss index a8ad576c9..f57f8f556 100644 --- a/cds/modules/theme/assets/bootstrap3/scss/cds/cds.scss +++ b/cds/modules/theme/assets/bootstrap3/scss/cds/cds.scss @@ -81,6 +81,10 @@ html, body { } } +.panel-heading-warning { + background-color: $brand-warning !important; +} + .cds-deposit-metadata-extraction-alert { line-height: 33px; span { diff --git a/tests/unit/test_video_rest.py b/tests/unit/test_video_rest.py index 0fa87973b..e052ec07c 100644 --- a/tests/unit/test_video_rest.py +++ b/tests/unit/test_video_rest.py @@ -27,6 +27,7 @@ import copy import json +from io import BytesIO from time import sleep import mock @@ -44,6 +45,7 @@ from invenio_db import db from invenio_indexer.api import RecordIndexer from invenio_search import current_search_client +from invenio_files_rest.models import ObjectVersion from cds.modules.deposit.api import deposit_project_resolver, deposit_video_resolver from cds.modules.deposit.receivers import datacite_register_after_publish @@ -588,6 +590,68 @@ def test_mint_doi_with_cli( doi, f"https://videos.cern.ch/record/{recid}" ) +def test_additional_files( + api_app, + users, + location, + json_headers, + json_partial_project_headers, + json_partial_video_headers, + deposit_metadata, + video_deposit_metadata, + project_deposit_metadata, +): + """Test video publish without DOI, then mint DOI using CLI.""" + api_app.config["DEPOSIT_DATACITE_MINTING_ENABLED"] = True + + with api_app.test_client() as client: + # Log in as the first user + login_user(User.query.get(users[0])) + + # Create a new project + project_dict = _create_new_project( + client, json_partial_project_headers, project_deposit_metadata + ) + + # Add a new empty video + video_dict = _add_video_info_to_project( + client, json_partial_video_headers, project_dict, video_deposit_metadata + ) + + video_depid = video_dict["metadata"]["_deposit"]["id"] + video_deposit = deposit_video_resolver(video_depid) + video_deposit_id = video_deposit["_deposit"]["id"] + bucket_id = video_deposit["_buckets"]["deposit"] + + # Upload additional file + key = "test.mp4" + headers = { + "X-Invenio-File-Tags": "context_type=additional_file" + } + resp = client.put( + url_for("invenio_files_rest.object_api", bucket_id=bucket_id, key=key), + input_stream=BytesIO(b"updated_content"), + headers=headers, + ) + assert resp.status_code == 200 + # Test it has the correct tags + tags = ObjectVersion.get(bucket_id, key).get_tags() + assert tags["context_type"] == "additional_file" + assert tags["content_type"] == "mp4" + assert tags["media_type"] == "video" + + # Upload invalid file and return 400 + key = "test" + headers = { + "X-Invenio-File-Tags": "context_type=additional_file" + } + resp = client.put( + url_for("invenio_files_rest.object_api", bucket_id=bucket_id, key=key), + input_stream=BytesIO(b"updated_content"), + headers=headers, + ) + assert resp.status_code == 400 + def _deposit_edit(client, json_headers, id): """Post action to edit deposit.""" From f43d52723527f638cbdc95e08ae3d8cc9cb6e24d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Z=C3=BCbeyde=20Civelek?= Date: Thu, 3 Apr 2025 18:39:12 +0300 Subject: [PATCH 02/41] docs: replacing main file --- ...Collection - CDS Videos Publish Video.json | 103 ++++++++++++------ README.rst | 85 ++++++++++++--- 2 files changed, 137 insertions(+), 51 deletions(-) diff --git a/Bruno Collection - CDS Videos Publish Video.json b/Bruno Collection - CDS Videos Publish Video.json index dcd1fb7ec..a1aed1578 100644 --- a/Bruno Collection - CDS Videos Publish Video.json +++ b/Bruno Collection - CDS Videos Publish Video.json @@ -4,38 +4,8 @@ "items": [ { "type": "http", - "name": "Step 6: Get Project to Check the Flow Status", + "name": "Optional- Update the Access of the Video", "seq": 7, - "request": { - "url": "{{baseURL}}/api/deposits/project/{{project_id}}", - "method": "GET", - "headers": [ - { - "name": "content-type", - "value": "application/vnd.project.partial+json", - "enabled": true - } - ], - "params": [], - "body": { - "mode": "json", - "json": "", - "formUrlEncoded": [], - "multipartForm": [] - }, - "script": {}, - "vars": {}, - "assertions": [], - "tests": "", - "auth": { - "mode": "inherit" - } - } - }, - { - "type": "http", - "name": "Optional: Update the Access of the Video", - "seq": 6, "request": { "url": "{{baseURL}}/api/deposits/video/{{video_id}}", "method": "PUT", @@ -184,7 +154,9 @@ "formUrlEncoded": [], "multipartForm": [] }, - "script": {}, + "script": { + "res": "let data = res.body;\nbru.setEnvVar(\"main_video_key\", data.key);" + }, "vars": {}, "assertions": [], "tests": "", @@ -195,7 +167,7 @@ }, { "type": "http", - "name": "Step 5: Upload additional file", + "name": "Optional: Upload additional file", "seq": 5, "request": { "url": "{{baseURL}}/api/files/{{bucket_id}}/{{additional_file}}", @@ -227,8 +199,38 @@ }, { "type": "http", - "name": "Step 7: Publish", + "name": "Step 5: Get Video to Check the Flow Status", "seq": 8, + "request": { + "url": "{{baseURL}}/api/deposits/video/{{video_id}}", + "method": "GET", + "headers": [ + { + "name": "content-type", + "value": "application/vnd.project.partial+json", + "enabled": true + } + ], + "params": [], + "body": { + "mode": "json", + "json": "", + "formUrlEncoded": [], + "multipartForm": [] + }, + "script": {}, + "vars": {}, + "assertions": [], + "tests": "", + "auth": { + "mode": "inherit" + } + } + }, + { + "type": "http", + "name": "Step 6- Publish", + "seq": 9, "request": { "url": "{{baseURL}}/api/deposits/video/{{video_id}}/actions/publish", "method": "POST", @@ -254,9 +256,40 @@ "mode": "inherit" } } + }, + { + "type": "http", + "name": "Optional: Replace Main File", + "seq": 6, + "request": { + "url": "{{baseURL}}/api/files/{{bucket_id}}/{{main_video_key}}", + "method": "PUT", + "headers": [ + { + "name": "X-Invenio-File-Tags", + "value": "times_replaced=1", + "enabled": true + } + ], + "params": [], + "body": { + "mode": "json", + "json": "", + "formUrlEncoded": [], + "multipartForm": [] + }, + "script": { + "req": "const fs = require('fs');\nconst path = require('path');\n\n// File details\nconst filename = \"video_name.mp4\"; // CHANGE HERE\nconst filePath = \"video_file_path\"; // CHANGE HERE\n\n// Read the file as raw binary data\nconst fileContent = fs.readFileSync(filePath);\n\n// Set request headers\nreq.setHeader(\"Accept\", \"application/json, text/plain, */*\"); \nreq.setHeader(\"Accept-Encoding\", \"gzip, deflate, br, zstd\"); \nreq.setHeader(\"Content-Length\", fileContent.length);\n\n// Attach the file content as the request body\nreq.setBody(fileContent);\n" + }, + "vars": {}, + "assertions": [], + "tests": "", + "auth": { + "mode": "inherit" + } + } } ], - "activeEnvironmentUid": "O01m8KLYsgrkGRjOSv443", "environments": [ { "variables": [ diff --git a/README.rst b/README.rst index 376788460..14ed4d7f4 100644 --- a/README.rst +++ b/README.rst @@ -33,10 +33,11 @@ Table of Contents - `Step 2: Create a Video <#step-2-create-a-video>`_ - `Step 3: Upload the Video <#step-3-upload-the-video>`_ - `Step 4: Create a Flow <#step-4-create-a-flow>`_ - - `Step 5: (Optional) Upload Additional File <#step-5-optional-upload-additional-file>`_ + - `Optional: Upload Additional File <#optional-upload-additional-file>`_ + - `Optional: Replace the Main Video File <#optional-replace-the-main-video-file>`_ - `Optional: Update the Access of the Video <#optional-update-the-access-of-the-video>`_ - - `Step 6: Get Project to Check the Flow Status <#step-6-get-project-to-check-the-flow-status>`_ - - `Step 7: Publish Video <#step-7-publish-video>`_ + - `Step 5: Get Video to Check the Flow Status <#step-5-get-video-to-check-the-flow-status>`_ + - `Step 6: Publish Video <#step-6-publish-video>`_ Prerequisites @@ -228,6 +229,7 @@ If you'd like to use the pre-configured REST API collection in Bruno, ensure you - Download this `Bruno collection <./Bruno%20Collection%20-%20CDS%20Videos%20Publish%20Video.json>`_. - Open Bruno and import downloaded collection. + - Switch to **Developer Mode**. - Create an environment for the collection. - Configure the environment by adding a variable named ``baseURL``. Set its value to your API base URL (e.g., ``http://localhost:5000``). @@ -349,7 +351,7 @@ To restrict the project, add ``_access/read``: **Response:** -Created project JSON. +Created project JSON. Save ``response.body.project_id`` as ``_project_id`` for later use. Step 2: Create a Video @@ -489,7 +491,7 @@ To restrict the video, add ``_access/read``. The ``_access/update`` will be the **Response:** -Created video JSON. +Created video JSON. Save ``response.body.id`` as ``video_id`` and ``response.body.metadata._buckets.deposit`` as ``bucket_id`` for later use. Step 3: Upload the Video @@ -531,7 +533,7 @@ Step 3: Upload the Video **Response:** -Uploaded video JSON. +Uploaded video JSON. Save ``response.body.version_id`` as ``main_file_version_id`` and ``response.body.key`` as ``video_key`` for later use. Step 4: Create a Flow @@ -584,10 +586,10 @@ Step 4: Create a Flow **Response:** -Created flow JSON. +Created flow JSON. If you want to replace the main video file later, save ``response.body.key`` as ``main_video_key``. -Step 5: (Optional) Upload Additional File +Optional: Upload Additional File ------------------------------------------ **Request:** @@ -622,9 +624,61 @@ Step 5: (Optional) Upload Additional File - To include the file in the body, modify the `pre-request script` in Bruno. + +Optional: Replace the Main Video File +------------------------------------------ + +**1. Request:** + +``PUT`` ``{{baseURL}}/api/files/{{bucket_id}}/{{main_video_key}}`` + +**Headers:** + +- ``X-Invenio-File-Tags: times_replaced=number_of_times_replaced`` + +**Parameters:** + +.. list-table:: + :header-rows: 1 + + * - **Name** + - **Type** + - **Location** + - **Description** + * - **bucket_id** + - string + - path + - ID of the bucket to upload the file. + * - **main_video_key** + - string + - path + - Key of the previously uploaded main file. + * - **file** + - file + - body + - The file to be uploaded. + + +- To include the file in the body, modify the `pre-request script` in Bruno. + +**⚠️ Important** + +You must use the exact ``key`` value from the response of the `Create a Flow <#step-4-create-a-flow>`_ request +(stored as ``main_video_key``) to overwrite the existing file when replacing the main video. + +This is required because the backend **renames the uploaded file** to distinguish it from automatically generated subformat files. +Using the original file name (``video_name``) will not work for replacement. + +Do **not** confuse this with the initial video upload request, which uses the original video file name (``video_name``). + **Response:** -Uploaded additional file JSON. +Uploaded file JSON. Save ``response.body.version_id`` as ``main_file_version_id`` and ``response.body.key`` as ``video_key`` for later use. + +**2. Request:** + +Start the flow again using the new main video file, along with the updated ``main_file_version_id`` and ``video_key``. +You can follow the same structure outlined in `Step 4 <#step-4-create-a-flow>`_. Optional: Update the Access of the Video @@ -675,12 +729,12 @@ To restrict the video, add ``_access/read``. If you want to change the access/up Updated video JSON. -Step 6: Get Project to Check the Flow Status +Step 5: Get Video to Check the Flow Status -------------------------------------------- **Request:** -``GET`` ``{{baseURL}}/api/deposits/project/{{project_id}}`` +``GET`` ``{{baseURL}}/api/deposits/video/{{video_id}}`` **Headers:** @@ -695,19 +749,18 @@ Step 6: Get Project to Check the Flow Status - **Type** - **Location** - **Description** - * - **project_id** + * - **video_id** - string - path - - ID of the project. + - ID of the video. **Response:** -Updated project JSON with flow status as ``state``: +Updated video JSON with flow status. You can find the flow status in ``response.body.metadata._cds.state``: .. code-block:: json { - "id": "b320568fc1264dda90a8f459be42892e", "_cds": { "state": { "file_transcode": "STARTED", @@ -718,7 +771,7 @@ Updated project JSON with flow status as ``state``: } -Step 7: Publish Video +Step 6: Publish Video ---------------------- Before publishing the video, ensure that the workflow is complete. From 9e2fa6b78fc853b421f210f5cf3ced7972512506 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Z=C3=BCbeyde=20Civelek?= Date: Fri, 11 Apr 2025 15:15:01 +0200 Subject: [PATCH 03/41] legacy: record redirection / new table for record marcxml --- cds/modules/legacy/__init__.py | 25 +++++++ .../bf9c38b8dabd_create_legacy_branch.py | 28 ++++++++ .../f57e61d833b1_create_legacy_table.py | 39 +++++++++++ cds/modules/legacy/minters.py | 38 +++++++++++ cds/modules/legacy/models.py | 66 +++++++++++++++++++ cds/modules/legacy/redirector.py | 47 +++++++++++++ cds/modules/legacy/resolver.py | 43 ++++++++++++ setup.cfg | 5 ++ tests/unit/test_legacy_redirector.py | 47 +++++++++++++ 9 files changed, 338 insertions(+) create mode 100644 cds/modules/legacy/__init__.py create mode 100644 cds/modules/legacy/alembic/bf9c38b8dabd_create_legacy_branch.py create mode 100644 cds/modules/legacy/alembic/f57e61d833b1_create_legacy_table.py create mode 100644 cds/modules/legacy/minters.py create mode 100644 cds/modules/legacy/models.py create mode 100644 cds/modules/legacy/redirector.py create mode 100644 cds/modules/legacy/resolver.py create mode 100644 tests/unit/test_legacy_redirector.py diff --git a/cds/modules/legacy/__init__.py b/cds/modules/legacy/__init__.py new file mode 100644 index 000000000..33eb1ff2e --- /dev/null +++ b/cds/modules/legacy/__init__.py @@ -0,0 +1,25 @@ +# -*- coding: utf-8 -*- +# +# This file is part of CDS. +# Copyright (C) 2025 CERN. +# +# CDS is free software; you can redistribute it +# and/or modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# CDS is distributed in the hope that it will be +# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with CDS; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307, USA. +# +# In applying this license, CERN does not +# waive the privileges and immunities granted to it by virtue of its status +# as an Intergovernmental Organization or submit itself to any jurisdiction. + +"""CDS-Videos legacy migration module.""" diff --git a/cds/modules/legacy/alembic/bf9c38b8dabd_create_legacy_branch.py b/cds/modules/legacy/alembic/bf9c38b8dabd_create_legacy_branch.py new file mode 100644 index 000000000..e325af166 --- /dev/null +++ b/cds/modules/legacy/alembic/bf9c38b8dabd_create_legacy_branch.py @@ -0,0 +1,28 @@ +# +# This file is part of Invenio. +# Copyright (C) 2025 CERN. +# +# Invenio is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +"""Create legacy branch""" + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = 'bf9c38b8dabd' +down_revision = None +branch_labels = ("legacy",) +depends_on = '35c1075e6360' + + +def upgrade(): + """Upgrade database.""" + pass + + +def downgrade(): + """Downgrade database.""" + pass diff --git a/cds/modules/legacy/alembic/f57e61d833b1_create_legacy_table.py b/cds/modules/legacy/alembic/f57e61d833b1_create_legacy_table.py new file mode 100644 index 000000000..f0e84db46 --- /dev/null +++ b/cds/modules/legacy/alembic/f57e61d833b1_create_legacy_table.py @@ -0,0 +1,39 @@ +# +# This file is part of Invenio. +# Copyright (C) 2025 CERN. +# +# Invenio is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +"""Create legacy table.""" + +from alembic import op +import sqlalchemy as sa +import sqlalchemy_utils +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = 'f57e61d833b1' +down_revision = 'bf9c38b8dabd' +branch_labels = () +depends_on = None + + +def upgrade(): + """Upgrade database.""" + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('cds_migration_legacy_records', + sa.Column('id', sqlalchemy_utils.types.uuid.UUIDType(), nullable=False), + sa.Column('migrated_record_object_uuid', sqlalchemy_utils.types.uuid.UUIDType(), nullable=True, comment='The uuid of the record metadata of the latest record metadata at the time of the migration.'), + sa.Column('legacy_recid', sa.Integer(), nullable=True, comment='The record id in the legacy system'), + sa.Column('json', sa.JSON().with_variant(postgresql.JSONB(none_as_null=True, astext_type=sa.Text()), 'postgresql'), nullable=True, comment='The extracted information of the legacy record before any transformation.'), + sa.PrimaryKeyConstraint('id', name=op.f('pk_cds_migration_legacy_records')) + ) + # ### end Alembic commands ### + + +def downgrade(): + """Downgrade database.""" + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('cds_migration_legacy_records') + # ### end Alembic commands ### diff --git a/cds/modules/legacy/minters.py b/cds/modules/legacy/minters.py new file mode 100644 index 000000000..46103a66f --- /dev/null +++ b/cds/modules/legacy/minters.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- +# +# This file is part of CDS. +# Copyright (C) 2025 CERN. +# +# CDS is free software; you can redistribute it +# and/or modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# CDS is distributed in the hope that it will be +# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with CDS; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307, USA. +# +# In applying this license, CERN does not +# waive the privileges and immunities granted to it by virtue of its status +# as an Intergovernmental Organization or submit itself to any jurisdiction. + +"""Minters.""" + +from invenio_pidstore.models import PersistentIdentifier, PIDStatus + + +def legacy_recid_minter(legacy_recid, uuid): + """Legacy_recid minter.""" + PersistentIdentifier.create( + pid_type="lrecid", + pid_value=legacy_recid, + object_type="rec", + object_uuid=uuid, + status=PIDStatus.REGISTERED, + ) diff --git a/cds/modules/legacy/models.py b/cds/modules/legacy/models.py new file mode 100644 index 000000000..a16caa57b --- /dev/null +++ b/cds/modules/legacy/models.py @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- +# +# This file is part of CDS. +# Copyright (C) 2025 CERN. +# +# CDS is free software; you can redistribute it +# and/or modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# CDS is distributed in the hope that it will be +# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with CDS; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307, USA. +# +# In applying this license, CERN does not +# waive the privileges and immunities granted to it by virtue of its status +# as an Intergovernmental Organization or submit itself to any jurisdiction. + +"""CDS Migration models.""" + +import json +import uuid + +from invenio_db import db +from sqlalchemy import Column, Integer, String +from sqlalchemy.dialects import postgresql +from sqlalchemy_utils.types import UUIDType + + +class CDSMigrationLegacyRecord(db.Model): + """Store the extracted legacy information for a specific record.""" + + __tablename__ = "cds_migration_legacy_records" + + id = db.Column( + UUIDType, + primary_key=True, + default=uuid.uuid4, + ) + migrated_record_object_uuid = Column( + UUIDType, + nullable=True, + comment="The uuid of the migrated record metadata.", + ) + legacy_recid = Column( + Integer, nullable=True, comment="The record id in the legacy system" + ) + json = db.Column( + db.JSON().with_variant( + postgresql.JSONB(none_as_null=True), + "postgresql", + ), + default=lambda: dict(), + nullable=True, + comment="The extracted information of the legacy record before any transformation.", + ) + + def __repr__(self): + """Representation of the model.""" + return f"" diff --git a/cds/modules/legacy/redirector.py b/cds/modules/legacy/redirector.py new file mode 100644 index 000000000..a6a2c6e22 --- /dev/null +++ b/cds/modules/legacy/redirector.py @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- +# +# This file is part of CDS. +# Copyright (C) 2025 CERN. +# +# CDS is free software; you can redistribute it +# and/or modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# CDS is distributed in the hope that it will be +# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with CDS; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307, USA. +# +# In applying this license, CERN does not +# waive the privileges and immunities granted to it by virtue of its status +# as an Intergovernmental Organization or submit itself to any jurisdiction. + +"""Redirector functions and rules.""" + +from flask import Blueprint, current_app, abort, redirect +from sqlalchemy.orm.exc import NoResultFound + +from .resolver import get_pid_by_legacy_recid + +HTTP_MOVED_PERMANENTLY = 301 + +blueprint = Blueprint( + "cds_legacy", __name__, template_folder="templates", url_prefix="/legacy" +) + +@blueprint.route("/record/", strict_slashes=False) +def legacy_record_redirect(legacy_id): + """Redirect legacy recid.""" + try: + pid = get_pid_by_legacy_recid(legacy_id) + except NoResultFound: + abort(404) + + url_path = f"{current_app.config['SITE_URL']}/record/{pid.pid_value}" + return redirect(url_path, HTTP_MOVED_PERMANENTLY) \ No newline at end of file diff --git a/cds/modules/legacy/resolver.py b/cds/modules/legacy/resolver.py new file mode 100644 index 000000000..eb2c6a1c2 --- /dev/null +++ b/cds/modules/legacy/resolver.py @@ -0,0 +1,43 @@ +# -*- coding: utf-8 -*- +# +# This file is part of CDS. +# Copyright (C) 2025 CERN. +# +# CDS is free software; you can redistribute it +# and/or modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# CDS is distributed in the hope that it will be +# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with CDS; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307, USA. +# +# In applying this license, CERN does not +# waive the privileges and immunities granted to it by virtue of its status +# as an Intergovernmental Organization or submit itself to any jurisdiction. + +"""Resolver.""" + +from invenio_pidstore.models import PersistentIdentifier + + +def get_pid_by_legacy_recid(legacy_recid): + """Get record by pid value.""" + # Get the object uuid from pidstore + recid = PersistentIdentifier.query.filter_by( + pid_value=legacy_recid, object_type="rec", pid_type="lrecid" + ).one() + + # Use the object uuid to get the pid value + record_pid = PersistentIdentifier.query.filter_by( + object_uuid=recid.object_uuid, object_type="rec", pid_type="recid" + ).one() + + return record_pid + diff --git a/setup.cfg b/setup.cfg index 7e2567e66..c8754551f 100644 --- a/setup.cfg +++ b/setup.cfg @@ -176,11 +176,13 @@ invenio_base.blueprints = cds_theme = cds.modules.theme.views:blueprint cds_redirector = cds.modules.redirector.views:blueprint cern_oauth = cds.modules.oauthclient.cern_openid:cern_openid_blueprint + cds_migration = cds.modules.legacy.redirector:blueprint invenio_config.module = cds = cds.config invenio_db.alembic = cds_announcements = cds.modules.announcements:alembic invenio_flows = cds.modules.flows:alembic + legacy = cds.modules.legacy:alembic invenio_jsonschemas.schemas = deposit = cds.modules.deposit.schemas record = cds.modules.records.schemas @@ -195,6 +197,7 @@ invenio_pidstore.minters = cds_report_number = cds.modules.records.minters:report_number_minter cds_recid = cds.modules.records.minters:cds_record_minter deposit = cds.modules.invenio_deposit.minters:deposit_minter + legacy = cds.modules.legacy.minters:legacy_recid_minter invenio_search.mappings = records = cds.modules.records.mappings deposits = cds.modules.deposit.mappings @@ -222,6 +225,8 @@ invenio_oauth2server.scopes = deposit_actions = cds.modules.invenio_deposit.scopes:actions_scope invenio_access.actions = deposit_admin_access = cds.modules.invenio_deposit.permissions:action_admin_access +invenio_db.models = + cds_migration_models = cds.modules.legacy.models [bdist_wheel] universal = 1 diff --git a/tests/unit/test_legacy_redirector.py b/tests/unit/test_legacy_redirector.py new file mode 100644 index 000000000..5ad47d74b --- /dev/null +++ b/tests/unit/test_legacy_redirector.py @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2024 CERN. +# +# Invenio-RDM is free software; you can redistribute it and/or modify +# it under the terms of the MIT License; see LICENSE file for more details. + + +from io import BytesIO + +import pytest +from invenio_pidstore.models import PersistentIdentifier +from cds.modules.legacy.minters import legacy_recid_minter +from invenio_db import db + +LEGACY_RECID = "123456" +LEGACY_RECID_PID_TYPE = "lrecid" + +def test_legacy_record_redirection(app, video_published): + """Test legacy redirection mechanism.""" + + with app.test_client() as client: + # Fetch published record and its UUID + recid_pid, _ = video_published.fetch_published() + record_uuid = str(recid_pid.object_uuid) + + # Mint legacy PID + legacy_recid_minter(LEGACY_RECID, record_uuid) + db.session.commit() + + # Expected redirection target + expected_location = f"{app.config['SITE_URL']}/record/{recid_pid.pid_value}" + + # Test redirection from legacy recid + url = f"/legacy/record/{LEGACY_RECID}" + response = client.get(url, follow_redirects=False) + assert response.status_code == 301 + assert response.location == expected_location + + # Optionally follow the redirect if the final destination is also handled + response = client.get(url, follow_redirects=True) + assert response.status_code == 200 + + # Test not found for unknown recid + response = client.get("/legacy/record/654321") + assert response.status_code == 404 + From f73e56ac94e2f20e51118d986706a0cc39677486 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Z=C3=BCbeyde=20Civelek?= Date: Mon, 28 Apr 2025 14:39:26 +0200 Subject: [PATCH 04/41] subtitles: fix renaming on publish --- cds/modules/deposit/api.py | 10 +++++++++- tests/unit/test_video.py | 29 ++++++++++++++++------------- 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/cds/modules/deposit/api.py b/cds/modules/deposit/api.py index 90504749f..5cc9bed37 100644 --- a/cds/modules/deposit/api.py +++ b/cds/modules/deposit/api.py @@ -862,7 +862,15 @@ def _rename_subtitles(self): subtitle_obj_key = "{}_{}.vtt".format( self["report_number"][0], match.group("iso_lang") ) - subtitle_obj.key = subtitle_obj_key + obj = ObjectVersion.create( + bucket=subtitle_obj.bucket, + key=subtitle_obj_key, + _file_id=subtitle_obj.file_id, + ) + # copy tags to the newly created object version + for tag in subtitle_obj.tags: + ObjectVersionTag.create_or_update(obj, tag.key, tag.value) + subtitle_obj.remove() def _rename_master_file(self, master_file): """Rename master file.""" diff --git a/tests/unit/test_video.py b/tests/unit/test_video.py index b9c6217a5..a51980faa 100644 --- a/tests/unit/test_video.py +++ b/tests/unit/test_video.py @@ -514,7 +514,6 @@ def test_video_keywords(es, api_project, keyword_1, keyword_2, users): @mock.patch("flask_login.current_user", mock_current_user) -@pytest.mark.skip(reason="TO BE CHECKED") def test_deposit_vtt_tags(api_app, db, api_project, users): """Test VTT tag generation.""" project, video_1, video_2 = api_project @@ -554,7 +553,7 @@ def test_deposit_vtt_tags(api_app, db, api_project, users): video_1 = deposit_video_resolver(video_1_depid) ObjectVersion.delete(bucket=video_1._bucket, key=obj.key) obj2 = ObjectVersion.create( - video_1._bucket, key="test_en.vtt", stream=BytesIO(b"hello") + video_1._bucket, key="new_fr.vtt", stream=BytesIO(b"hello") ) # publish again the video @@ -567,7 +566,7 @@ def test_deposit_vtt_tags(api_app, db, api_project, users): content_type="vtt", media_type="subtitle", context_type="subtitle", - language="en", + language="fr", ) # edit a re-published video @@ -717,13 +716,17 @@ def test_video_name_after_publish(api_app, db, api_project, users): def check_object_tags(obj, video, **tags): """Check tags on an ObjectVersion (i.e. on DB and deposit/record dump).""" assert obj.get_tags() == tags - for dump in [ - [d for d in files if d["key"] == obj.key][0] - for files in [video._get_files_dump(), video.fetch_published()[1]["_files"]] - ]: - assert dump["content_type"] == tags["content_type"] - assert dump["context_type"] == tags["context_type"] - assert dump["media_type"] == tags["media_type"] - assert dump["tags"] == { - t: tags[t] for t in tags if t not in ["context_type", "media_type"] - } + + file_sources = [ + video._get_files_dump(), + video.fetch_published()[1]["_files"] + ] + for files in file_sources: + matching_files = [d for d in files if d["key"] == obj.key] + for dump in matching_files: + assert dump["content_type"] == tags["content_type"] + assert dump["context_type"] == tags["context_type"] + assert dump["media_type"] == tags["media_type"] + assert dump["tags"] == { + t: tags[t] for t in tags if t not in ["context_type", "media_type"] + } From dd12aa10721a8c8114bfb2d8f06bc31f1b38e01f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Z=C3=BCbeyde=20Civelek?= Date: Tue, 13 May 2025 10:48:00 +0200 Subject: [PATCH 05/41] schema: add legacy_recid and contributor roles --- .../records/videos/video/video-v1.0.0.json | 3 ++ .../videos/project/project-v1.0.0.json | 3 ++ .../records/videos/video/video-v1.0.0.json | 7 ++++ .../json/cds_deposit/forms/project.json | 12 +++++++ .../static/json/cds_deposit/forms/video.json | 12 +++++++ .../fixtures/data/pages/guides/search.html | 6 ++++ .../records/videos/video/video-v1.0.0.json | 3 ++ .../videos/project/project-v1.0.0.json | 3 ++ .../records/videos/video/video-v1.0.0.json | 7 ++++ .../records/serializers/schemas/datacite.py | 33 +++++++++++-------- .../records/serializers/schemas/video.py | 1 + 11 files changed, 76 insertions(+), 14 deletions(-) diff --git a/cds/modules/deposit/mappings/os-v2/deposits/records/videos/video/video-v1.0.0.json b/cds/modules/deposit/mappings/os-v2/deposits/records/videos/video/video-v1.0.0.json index ae4cbfb87..7775e85ae 100644 --- a/cds/modules/deposit/mappings/os-v2/deposits/records/videos/video/video-v1.0.0.json +++ b/cds/modules/deposit/mappings/os-v2/deposits/records/videos/video/video-v1.0.0.json @@ -228,6 +228,9 @@ "recid": { "type": "double" }, + "legacy_recid": { + "type": "double" + }, "license": { "properties": { "license": { diff --git a/cds/modules/deposit/schemas/deposits/records/videos/project/project-v1.0.0.json b/cds/modules/deposit/schemas/deposits/records/videos/project/project-v1.0.0.json index 2e91fa703..0729bb117 100644 --- a/cds/modules/deposit/schemas/deposits/records/videos/project/project-v1.0.0.json +++ b/cds/modules/deposit/schemas/deposits/records/videos/project/project-v1.0.0.json @@ -142,6 +142,7 @@ "Camera Operator", "Comments by", "Co-Producer", + "ContactPerson", "Creator", "Credits", "Director", @@ -150,7 +151,9 @@ "Narrator", "Photography", "Producer", + "RelatedPerson", "Reporter", + "ResearchGroup", "Screenwriter", "Speaker", "Subtitles by", diff --git a/cds/modules/deposit/schemas/deposits/records/videos/video/video-v1.0.0.json b/cds/modules/deposit/schemas/deposits/records/videos/video/video-v1.0.0.json index 8c84e84bd..2d67fc5ab 100644 --- a/cds/modules/deposit/schemas/deposits/records/videos/video/video-v1.0.0.json +++ b/cds/modules/deposit/schemas/deposits/records/videos/video/video-v1.0.0.json @@ -62,6 +62,7 @@ "Camera Operator", "Comments by", "Co-Producer", + "ContactPerson", "Creator", "Credits", "Director", @@ -70,7 +71,9 @@ "Narrator", "Photography", "Producer", + "RelatedPerson", "Reporter", + "ResearchGroup", "Screenwriter", "Speaker", "Subtitles by", @@ -696,6 +699,10 @@ "type": "number", "description": "Invenio record identifier (integer)." }, + "legacy_recid": { + "type": "number", + "description": "Legacy record identifier (integer). Kept for auditing reasons." + }, "original_source": { "type": "string" }, diff --git a/cds/modules/deposit/static/json/cds_deposit/forms/project.json b/cds/modules/deposit/static/json/cds_deposit/forms/project.json index 254ffeb66..b80c100fe 100644 --- a/cds/modules/deposit/static/json/cds_deposit/forms/project.json +++ b/cds/modules/deposit/static/json/cds_deposit/forms/project.json @@ -160,6 +160,10 @@ "value": "Co-Producer", "name": "Co-Producer" }, + { + "value": "ContactPerson", + "name": "Contact Person" + }, { "value": "Creator", "name": "Creator" @@ -192,10 +196,18 @@ "value": "Producer", "name": "Producer" }, + { + "value": "RelatedPerson", + "name": "Related Person" + }, { "value": "Reporter", "name": "Reporter" }, + { + "value": "ResearchGroup", + "name": "Research Group" + }, { "value": "Screenwriter", "name": "Screenwriter" diff --git a/cds/modules/deposit/static/json/cds_deposit/forms/video.json b/cds/modules/deposit/static/json/cds_deposit/forms/video.json index c90b9884b..981de0f3e 100644 --- a/cds/modules/deposit/static/json/cds_deposit/forms/video.json +++ b/cds/modules/deposit/static/json/cds_deposit/forms/video.json @@ -292,6 +292,10 @@ "value": "Co-Producer", "name": "Co-Producer" }, + { + "value": "ContactPerson", + "name": "Contact Person" + }, { "value": "Creator", "name": "Creator" @@ -324,10 +328,18 @@ "value": "Producer", "name": "Producer" }, + { + "value": "RelatedPerson", + "name": "Related Person" + }, { "value": "Reporter", "name": "Reporter" }, + { + "value": "ResearchGroup", + "name": "Research Group" + }, { "value": "Screenwriter", "name": "Screenwriter" diff --git a/cds/modules/fixtures/data/pages/guides/search.html b/cds/modules/fixtures/data/pages/guides/search.html index af221fe89..2bc11082d 100644 --- a/cds/modules/fixtures/data/pages/guides/search.html +++ b/cds/modules/fixtures/data/pages/guides/search.html @@ -371,6 +371,8 @@

Contributor roles (CV)

  • Comments by
  • +
  • Contact Person
  • +
  • Co-Producer
  • Creator
  • @@ -389,8 +391,12 @@

    Contributor roles (CV)

  • Producer
  • +
  • Related Person
  • +
  • Reporter
  • +
  • Research Group
  • +
  • Screenwriter
  • Speaker
  • diff --git a/cds/modules/records/mappings/os-v2/records/videos/video/video-v1.0.0.json b/cds/modules/records/mappings/os-v2/records/videos/video/video-v1.0.0.json index d2e230a8c..c680df408 100644 --- a/cds/modules/records/mappings/os-v2/records/videos/video/video-v1.0.0.json +++ b/cds/modules/records/mappings/os-v2/records/videos/video/video-v1.0.0.json @@ -234,6 +234,9 @@ "recid": { "type": "double" }, + "legacy_recid": { + "type": "double" + }, "doi": { "type": "text" }, diff --git a/cds/modules/records/schemas/records/videos/project/project-v1.0.0.json b/cds/modules/records/schemas/records/videos/project/project-v1.0.0.json index 756fc1b09..107b09231 100644 --- a/cds/modules/records/schemas/records/videos/project/project-v1.0.0.json +++ b/cds/modules/records/schemas/records/videos/project/project-v1.0.0.json @@ -139,6 +139,7 @@ "Camera Operator", "Comments by", "Co-Producer", + "ContactPerson", "Creator", "Credits", "Director", @@ -147,7 +148,9 @@ "Narrator", "Photography", "Producer", + "RelatedPerson", "Reporter", + "ResearchGroup", "Screenwriter", "Speaker", "Subtitles by", diff --git a/cds/modules/records/schemas/records/videos/video/video-v1.0.0.json b/cds/modules/records/schemas/records/videos/video/video-v1.0.0.json index 5ace8d1d3..f77ec50b8 100644 --- a/cds/modules/records/schemas/records/videos/video/video-v1.0.0.json +++ b/cds/modules/records/schemas/records/videos/video/video-v1.0.0.json @@ -62,6 +62,7 @@ "Camera Operator", "Comments by", "Co-Producer", + "ContactPerson", "Creator", "Credits", "Director", @@ -70,7 +71,9 @@ "Narrator", "Photography", "Producer", + "RelatedPerson", "Reporter", + "ResearchGroup", "Screenwriter", "Speaker", "Subtitles by", @@ -631,6 +634,10 @@ "type": "number", "description": "Invenio record identifier (integer)." }, + "legacy_recid": { + "type": "number", + "description": "Legacy record identifier (integer). Kept for auditing reasons." + }, "original_source": { "type": "string" }, diff --git a/cds/modules/records/serializers/schemas/datacite.py b/cds/modules/records/serializers/schemas/datacite.py index 3c991b249..44776fe03 100644 --- a/cds/modules/records/serializers/schemas/datacite.py +++ b/cds/modules/records/serializers/schemas/datacite.py @@ -53,6 +53,7 @@ class DataCiteSchemaV1(Schema): """DataCite schema v1.""" creators = fields.Method("get_creators") + contributors = fields.Method("get_contributors") dates = fields.Method("get_dates") descriptions = fields.Method("get_descriptions") identifier = fields.Nested(IdentifierSchema, attribute="metadata.doi") @@ -101,23 +102,27 @@ def get_creators(self, obj): """Get creators.""" items = [] for item in obj["metadata"].get("contributors", []): - items.append( - { - "creatorName": item.get("name", ""), - } + if item.get("role", "") != "ResearchGroup": + items.append( + { + "creatorName": item.get("name", ""), + } ) return items - # def get_contributors(self, obj): - # """Get contributors.""" - # items = [] - # for item in obj['metadata'].get('contributors', []): - # items.append({ - # 'contributorType': item.get('role', ''), - # 'contributorName': item.get('name', ''), - # # FIXME nameIdentifier and nameIdentifierScheme, ... ? - # }) - # return items + def get_contributors(self, obj): + """Get contributors.""" + items = [] + for item in obj['metadata'].get('contributors', []): + if item.get("role", "") == "ResearchGroup": + items.append( + { + 'contributorType': item.get('role', ''), + 'contributorName': item.get('name', ''), + # FIXME nameIdentifier and nameIdentifierScheme, ... ? + } + ) + return items def get_publication_year(self, obj): """Get publication year.""" diff --git a/cds/modules/records/serializers/schemas/video.py b/cds/modules/records/serializers/schemas/video.py index c83e2e83a..79104c5e1 100644 --- a/cds/modules/records/serializers/schemas/video.py +++ b/cds/modules/records/serializers/schemas/video.py @@ -142,6 +142,7 @@ class VideoSchema(StrictKeysSchema): note = fields.Str() publication_date = fields.Str() recid = fields.Number() + legacy_recid =fields.Number() related_links = fields.Nested(RelatedLinksSchema, many=True) report_number = fields.List(fields.Str, many=True) schema = fields.Str(attribute="$schema", data_key="$schema") From ed07da0d8dfcef60f23544a134b07450cf71cf7e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Z=C3=BCbeyde=20Civelek?= Date: Thu, 15 May 2025 16:19:48 +0200 Subject: [PATCH 06/41] schema: add _curation and alternate_identifiers --- .../records/videos/video/video-v1.0.0.json | 21 +++++++++++++ .../records/videos/video/video-v1.0.0.json | 31 +++++++++++++++++++ .../records/videos/video/video-v1.0.0.json | 21 +++++++++++++ .../records/videos/video/video-v1.0.0.json | 31 +++++++++++++++++++ .../records/serializers/schemas/common.py | 13 ++++++++ .../records/serializers/schemas/video.py | 8 ++++- 6 files changed, 124 insertions(+), 1 deletion(-) diff --git a/cds/modules/deposit/mappings/os-v2/deposits/records/videos/video/video-v1.0.0.json b/cds/modules/deposit/mappings/os-v2/deposits/records/videos/video/video-v1.0.0.json index 7775e85ae..edf1ce11d 100644 --- a/cds/modules/deposit/mappings/os-v2/deposits/records/videos/video/video-v1.0.0.json +++ b/cds/modules/deposit/mappings/os-v2/deposits/records/videos/video/video-v1.0.0.json @@ -336,6 +336,27 @@ "type": "keyword" } } + }, + "alternate_identifiers": { + "properties": { + "scheme": { + "type": "keyword" + }, + "value": { + "type": "keyword" + } + } + }, + "_curation": { + "type": "object", + "properties": { + "legacy_report_number": { + "type": "keyword" + }, + "department": { + "type": "keyword" + } + } } } } diff --git a/cds/modules/deposit/schemas/deposits/records/videos/video/video-v1.0.0.json b/cds/modules/deposit/schemas/deposits/records/videos/video/video-v1.0.0.json index 2d67fc5ab..b5996d21a 100644 --- a/cds/modules/deposit/schemas/deposits/records/videos/video/video-v1.0.0.json +++ b/cds/modules/deposit/schemas/deposits/records/videos/video/video-v1.0.0.json @@ -193,6 +193,23 @@ "description": "List of identifiers on external systems.", "title": "External identifiers" }, + "alternate_identifiers": { + "items": { + "properties": { + "scheme": { + "title": "Scheme of the identifier (Vocabulary)", + "type": "string" + }, + "value": { + "title": "Value of the identifier", + "type": "string" + } + } + }, + "required": ["value", "scheme"], + "title": "List of alternate identifiers of the record", + "type": "array" + }, "$schema": { "type": "string" }, @@ -708,6 +725,20 @@ }, "_project_id": { "type": "string" + }, + "_curation": { + "properties": { + "legacy_report_number": { + "title": "Legacy record report number.", + "type": "string" + }, + "department": { + "title": "CERN department.", + "type": "string" + } + }, + "title": "Fields that needs curation.", + "type": "object" } } } \ No newline at end of file diff --git a/cds/modules/records/mappings/os-v2/records/videos/video/video-v1.0.0.json b/cds/modules/records/mappings/os-v2/records/videos/video/video-v1.0.0.json index c680df408..e61b96a4d 100644 --- a/cds/modules/records/mappings/os-v2/records/videos/video/video-v1.0.0.json +++ b/cds/modules/records/mappings/os-v2/records/videos/video/video-v1.0.0.json @@ -334,6 +334,27 @@ }, "publication_date": { "type": "text" + }, + "alternate_identifiers": { + "properties": { + "scheme": { + "type": "keyword" + }, + "value": { + "type": "keyword" + } + } + }, + "_curation": { + "type": "object", + "properties": { + "legacy_report_number": { + "type": "keyword" + }, + "department": { + "type": "keyword" + } + } } } } diff --git a/cds/modules/records/schemas/records/videos/video/video-v1.0.0.json b/cds/modules/records/schemas/records/videos/video/video-v1.0.0.json index f77ec50b8..d9c5453eb 100644 --- a/cds/modules/records/schemas/records/videos/video/video-v1.0.0.json +++ b/cds/modules/records/schemas/records/videos/video/video-v1.0.0.json @@ -204,6 +204,23 @@ "description": "List of identifiers on external systems.", "title": "External identifiers" }, + "alternate_identifiers": { + "items": { + "properties": { + "scheme": { + "title": "Scheme of the identifier (Vocabulary)", + "type": "string" + }, + "value": { + "title": "Value of the identifier", + "type": "string" + } + } + }, + "required": ["value", "scheme"], + "title": "List of alternate identifiers of the record", + "type": "array" + }, "subject": { "additionalProperties": false, "description": "Subject.", @@ -643,6 +660,20 @@ }, "_project_id": { "type": "string" + }, + "_curation": { + "properties": { + "legacy_report_number": { + "title": "Legacy record report number.", + "type": "string" + }, + "department": { + "title": "CERN department.", + "type": "string" + } + }, + "title": "Fields that needs curation.", + "type": "object" } }, "title": "CDS Base Record Schema v1.0.0" diff --git a/cds/modules/records/serializers/schemas/common.py b/cds/modules/records/serializers/schemas/common.py index bfae055f4..c103889a8 100644 --- a/cds/modules/records/serializers/schemas/common.py +++ b/cds/modules/records/serializers/schemas/common.py @@ -155,3 +155,16 @@ class ExternalSystemIdentifiersField(StrictKeysSchema): value = fields.Str() schema = fields.Str() + + +class AlternateIdentifiersSchema(StrictKeysSchema): + """Field alternate_identifiers.""" + + value = fields.Str() + schema = fields.Str() + + +class CurationSchema(StrictKeysSchema): + """Curation schema.""" + legacy_report_number = fields.Str() + department = fields.Str() diff --git a/cds/modules/records/serializers/schemas/video.py b/cds/modules/records/serializers/schemas/video.py index 79104c5e1..34e59c46b 100644 --- a/cds/modules/records/serializers/schemas/video.py +++ b/cds/modules/records/serializers/schemas/video.py @@ -25,8 +25,10 @@ from ..fields.datetime import DateString from .common import ( AccessSchema, + AlternateIdentifiersSchema, BucketSchema, ContributorSchema, + CurationSchema, DepositSchema, ExternalSystemIdentifiersField, KeywordsSchema, @@ -132,6 +134,9 @@ class VideoSchema(StrictKeysSchema): external_system_identifiers = fields.Nested( ExternalSystemIdentifiersField, many=True ) + alternate_identifiers = fields.Nested( + AlternateIdentifiersSchema, many=True + ) featured = fields.Boolean() internal_note = fields.Str() internal_categories = fields.Raw() @@ -149,7 +154,8 @@ class VideoSchema(StrictKeysSchema): title = fields.Nested(TitleSchema, required=True) translations = fields.Nested(TranslationsSchema, many=True) type = fields.Str() - vr = fields.Boolean() + vr = fields.Boolean(), + _curation = fields.Nested(CurationSchema) # Preservation fields location = fields.Str() From d475db1260f7f11148882c8cde494ea023183657 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Z=C3=BCbeyde=20Civelek?= Date: Mon, 26 May 2025 15:27:24 +0200 Subject: [PATCH 07/41] schema: add additonal titles and descriptions --- .../records/videos/video/video-v1.0.0.json | 48 +++++ .../records/videos/video/video-v1.0.0.json | 194 +++++++++++++++++- .../records/videos/video/video-v1.0.0.json | 48 +++++ .../records/videos/video/video-v1.0.0.json | 194 +++++++++++++++++- .../records/serializers/schemas/common.py | 29 ++- .../records/serializers/schemas/video.py | 18 +- 6 files changed, 522 insertions(+), 9 deletions(-) diff --git a/cds/modules/deposit/mappings/os-v2/deposits/records/videos/video/video-v1.0.0.json b/cds/modules/deposit/mappings/os-v2/deposits/records/videos/video/video-v1.0.0.json index edf1ce11d..74c0b78f1 100644 --- a/cds/modules/deposit/mappings/os-v2/deposits/records/videos/video/video-v1.0.0.json +++ b/cds/modules/deposit/mappings/os-v2/deposits/records/videos/video/video-v1.0.0.json @@ -355,6 +355,54 @@ }, "department": { "type": "keyword" + }, + "volumes": { + "type": "text" + } + } + }, + "additional_titles": { + "type": "object", + "properties": { + "title": { + "type": "text" + }, + "type": { + "type": "keyword" + }, + "lang": { + "type": "keyword" + } + } + }, + "additional_descriptions": { + "type": "object", + "properties": { + "description": { + "type": "text" + }, + "type": { + "type": "keyword" + }, + "lang": { + "type": "keyword" + } + } + }, + "related_identifiers": { + "type": "object", + "properties": { + "identifier": { + "type": "text" + }, + "scheme": { + "type": "keyword" + }, + "relation_type": { + "type": "keyword" + }, + "resource_type": { + "type": "keyword" } } } diff --git a/cds/modules/deposit/schemas/deposits/records/videos/video/video-v1.0.0.json b/cds/modules/deposit/schemas/deposits/records/videos/video/video-v1.0.0.json index b5996d21a..79d22a6e7 100644 --- a/cds/modules/deposit/schemas/deposits/records/videos/video/video-v1.0.0.json +++ b/cds/modules/deposit/schemas/deposits/records/videos/video/video-v1.0.0.json @@ -198,7 +198,8 @@ "properties": { "scheme": { "title": "Scheme of the identifier (Vocabulary)", - "type": "string" + "type": "string", + "enum": ["URL", "DOI", "CDS"] }, "value": { "title": "Value of the identifier", @@ -735,10 +736,201 @@ "department": { "title": "CERN department.", "type": "string" + }, + "volumes": { + "title": "Volume list for this record.", + "type": "array", + "items": { + "type": "string" + } } }, "title": "Fields that needs curation.", "type": "object" + }, + "additional_titles": { + "description": "Additional record titles.", + "type": "array", + "items": { + "type": "object", + "additionalProperties": false, + "properties": { + "title": { + "description": "Additional title of the record.", + "type": "string" + }, + "type": { + "type": "string", + "enum": ["Subtitle", "Other", "TranslatedTitle", "AlternativeTitle"] + }, + "lang": { + "type": "string", + "enum": [ + "ar", + "ast", + "bg", + "ca", + "ch", + "cs", + "cy", + "da", + "de", + "el", + "en", + "en-fr", + "es", + "et", + "eu", + "fi", + "fr", + "ga", + "gd", + "gl", + "he", + "hi", + "hr", + "hu", + "it", + "lt", + "ja", + "ka", + "ko", + "kw", + "nb", + "nl", + "nn", + "no", + "pl", + "pt", + "rm", + "ro", + "ru", + "se", + "silent", + "sk", + "sl", + "sr", + "sv", + "tr", + "uk", + "ur", + "zh", + "zh_CN", + "zh_TW" + ] + } + } + } + }, + "additional_descriptions": { + "description": "Additional descriptions for the record.", + "type": "array", + "items": { + "type": "object", + "additionalProperties": false, + "properties": { + "description": { + "type": "string", + "description": "Descriptive content." + }, + "type": { + "type": "string", + "enum": [ + "Abstract", + "Methods", + "Other", + "SeriesInformation", + "TableOfContents", + "TechnicalInfo" + ] + }, + "lang": { + "type": "string", + "enum": [ + "ar", + "ast", + "bg", + "ca", + "ch", + "cs", + "cy", + "da", + "de", + "el", + "en", + "en-fr", + "es", + "et", + "eu", + "fi", + "fr", + "ga", + "gd", + "gl", + "he", + "hi", + "hr", + "hu", + "it", + "lt", + "ja", + "ka", + "ko", + "kw", + "nb", + "nl", + "nn", + "no", + "pl", + "pt", + "rm", + "ro", + "ru", + "se", + "silent", + "sk", + "sl", + "sr", + "sv", + "tr", + "uk", + "ur", + "zh", + "zh_CN", + "zh_TW" + ] + } + } + } + }, + "related_identifiers": { + "type": "array", + "items": { + "type": "object", + "required": ["identifier", "scheme", "relation_type"], + "additionalProperties": false, + "properties": { + "identifier": { + "type": "string", + "description": "The actual identifier (e.g., URL or DOI)." + }, + "scheme": { + "type": "string", + "enum": ["URL", "DOI", "CDS", "Indico"], + "description": "The scheme describing the identifier type." + }, + "relation_type": { + "type": "string", + "enum": ["IsPartOf", "IsVariantFormOf"], + "description": "Describes the relationship with the current record." + }, + "resource_type": { + "type": "string", + "enum": ["Event", "ConferencePaper", "Report"], + "description": "Type of the related resource." + } + } + } } } } \ No newline at end of file diff --git a/cds/modules/records/mappings/os-v2/records/videos/video/video-v1.0.0.json b/cds/modules/records/mappings/os-v2/records/videos/video/video-v1.0.0.json index e61b96a4d..8fca391b5 100644 --- a/cds/modules/records/mappings/os-v2/records/videos/video/video-v1.0.0.json +++ b/cds/modules/records/mappings/os-v2/records/videos/video/video-v1.0.0.json @@ -353,6 +353,54 @@ }, "department": { "type": "keyword" + }, + "volumes": { + "type": "text" + } + } + }, + "additional_titles": { + "type": "object", + "properties": { + "title": { + "type": "text" + }, + "type": { + "type": "keyword" + }, + "lang": { + "type": "keyword" + } + } + }, + "additional_descriptions": { + "type": "object", + "properties": { + "description": { + "type": "text" + }, + "type": { + "type": "keyword" + }, + "lang": { + "type": "keyword" + } + } + }, + "related_identifiers": { + "type": "object", + "properties": { + "identifier": { + "type": "text" + }, + "scheme": { + "type": "keyword" + }, + "relation_type": { + "type": "keyword" + }, + "resource_type": { + "type": "keyword" } } } diff --git a/cds/modules/records/schemas/records/videos/video/video-v1.0.0.json b/cds/modules/records/schemas/records/videos/video/video-v1.0.0.json index d9c5453eb..a70554de4 100644 --- a/cds/modules/records/schemas/records/videos/video/video-v1.0.0.json +++ b/cds/modules/records/schemas/records/videos/video/video-v1.0.0.json @@ -209,7 +209,8 @@ "properties": { "scheme": { "title": "Scheme of the identifier (Vocabulary)", - "type": "string" + "type": "string", + "enum": ["URL", "DOI", "CDS"] }, "value": { "title": "Value of the identifier", @@ -670,10 +671,201 @@ "department": { "title": "CERN department.", "type": "string" + }, + "volumes": { + "title": "Volume list for this record.", + "type": "array", + "items": { + "type": "string" + } } }, "title": "Fields that needs curation.", "type": "object" + }, + "additional_titles": { + "description": "Additional record titles.", + "type": "array", + "items": { + "type": "object", + "additionalProperties": false, + "properties": { + "title": { + "description": "Additional title of the record.", + "type": "string" + }, + "type": { + "type": "string", + "enum": ["Subtitle", "Other", "TranslatedTitle", "AlternativeTitle"] + }, + "lang": { + "type": "string", + "enum": [ + "ar", + "ast", + "bg", + "ca", + "ch", + "cs", + "cy", + "da", + "de", + "el", + "en", + "en-fr", + "es", + "et", + "eu", + "fi", + "fr", + "ga", + "gd", + "gl", + "he", + "hi", + "hr", + "hu", + "it", + "lt", + "ja", + "ka", + "ko", + "kw", + "nb", + "nl", + "nn", + "no", + "pl", + "pt", + "rm", + "ro", + "ru", + "se", + "silent", + "sk", + "sl", + "sr", + "sv", + "tr", + "uk", + "ur", + "zh", + "zh_CN", + "zh_TW" + ] + } + } + } + }, + "additional_descriptions": { + "description": "Additional descriptions for the record.", + "type": "array", + "items": { + "type": "object", + "additionalProperties": false, + "properties": { + "description": { + "type": "string", + "description": "Descriptive content." + }, + "type": { + "type": "string", + "enum": [ + "Abstract", + "Methods", + "Other", + "SeriesInformation", + "TableOfContents", + "TechnicalInfo" + ] + }, + "lang": { + "type": "string", + "enum": [ + "ar", + "ast", + "bg", + "ca", + "ch", + "cs", + "cy", + "da", + "de", + "el", + "en", + "en-fr", + "es", + "et", + "eu", + "fi", + "fr", + "ga", + "gd", + "gl", + "he", + "hi", + "hr", + "hu", + "it", + "lt", + "ja", + "ka", + "ko", + "kw", + "nb", + "nl", + "nn", + "no", + "pl", + "pt", + "rm", + "ro", + "ru", + "se", + "silent", + "sk", + "sl", + "sr", + "sv", + "tr", + "uk", + "ur", + "zh", + "zh_CN", + "zh_TW" + ] + } + } + } + }, + "related_identifiers": { + "type": "array", + "items": { + "type": "object", + "required": ["identifier", "scheme", "relation_type"], + "additionalProperties": false, + "properties": { + "identifier": { + "type": "string", + "description": "The actual identifier (e.g., URL or DOI)." + }, + "scheme": { + "type": "string", + "enum": ["URL", "DOI", "CDS", "Indico"], + "description": "The scheme describing the identifier type." + }, + "relation_type": { + "type": "string", + "enum": ["IsPartOf", "IsVariantFormOf"], + "description": "Describes the relationship with the current record." + }, + "resource_type": { + "type": "string", + "enum": ["Event", "ConferencePaper", "Report"], + "description": "Type of the related resource." + } + } + } } }, "title": "CDS Base Record Schema v1.0.0" diff --git a/cds/modules/records/serializers/schemas/common.py b/cds/modules/records/serializers/schemas/common.py index c103889a8..3764a75b4 100644 --- a/cds/modules/records/serializers/schemas/common.py +++ b/cds/modules/records/serializers/schemas/common.py @@ -160,11 +160,36 @@ class ExternalSystemIdentifiersField(StrictKeysSchema): class AlternateIdentifiersSchema(StrictKeysSchema): """Field alternate_identifiers.""" - value = fields.Str() - schema = fields.Str() + value = fields.Str(required=True) + scheme = fields.Str(required=True) class CurationSchema(StrictKeysSchema): """Curation schema.""" + legacy_report_number = fields.Str() department = fields.Str() + volumes = fields.List(fields.Str()) + + +class AdditionalTitlesSchema(Schema): + """Additional titles schema.""" + + title = fields.Str() + type = fields.Str() + lang = fields.Str() + + +class AdditionalDescriptionsSchema(Schema): + """Additional descriptions schema.""" + + description = fields.Str() + type = fields.Str() + lang = fields.Str() + + +class RelatedIdentifiersSchema(Schema): + identifier = fields.Str(required=True) + scheme = fields.Str(required=True) + relation_type = fields.Str(required=True) + resource_type = fields.Str() \ No newline at end of file diff --git a/cds/modules/records/serializers/schemas/video.py b/cds/modules/records/serializers/schemas/video.py index 34e59c46b..dda00f1e0 100644 --- a/cds/modules/records/serializers/schemas/video.py +++ b/cds/modules/records/serializers/schemas/video.py @@ -25,6 +25,8 @@ from ..fields.datetime import DateString from .common import ( AccessSchema, + AdditionalTitlesSchema, + AdditionalDescriptionsSchema, AlternateIdentifiersSchema, BucketSchema, ContributorSchema, @@ -34,6 +36,7 @@ KeywordsSchema, LicenseSchema, OaiSchema, + RelatedIdentifiersSchema, RelatedLinksSchema, StrictKeysSchema, TitleSchema, @@ -134,9 +137,6 @@ class VideoSchema(StrictKeysSchema): external_system_identifiers = fields.Nested( ExternalSystemIdentifiersField, many=True ) - alternate_identifiers = fields.Nested( - AlternateIdentifiersSchema, many=True - ) featured = fields.Boolean() internal_note = fields.Str() internal_categories = fields.Raw() @@ -154,9 +154,17 @@ class VideoSchema(StrictKeysSchema): title = fields.Nested(TitleSchema, required=True) translations = fields.Nested(TranslationsSchema, many=True) type = fields.Str() - vr = fields.Boolean(), + vr = fields.Boolean() _curation = fields.Nested(CurationSchema) - + additional_titles = fields.List(fields.Nested(AdditionalTitlesSchema)) + additional_descriptions = fields.List(fields.Nested(AdditionalDescriptionsSchema)) + alternate_identifiers = fields.Nested( + AlternateIdentifiersSchema, many=True + ) + related_identifiers = fields.Nested( + RelatedIdentifiersSchema, many=True + ) + # Preservation fields location = fields.Str() original_source = fields.Str() From 5279bc775bd8b205e8a66d88df1bdd5f53d9863a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Z=C3=BCbeyde=20Civelek?= Date: Tue, 10 Jun 2025 12:53:24 +0300 Subject: [PATCH 08/41] schema: add tags and additional languages --- .../records/videos/video/video-v1.0.0.json | 32 ++ .../videos/video/definitions-v1.0.0.json | 57 ++++ .../records/videos/video/video-v1.0.0.json | 286 +++++------------- .../records/videos/video/video-v1.0.0.json | 32 ++ .../videos/video/definitions-v1.0.0.json | 57 ++++ .../records/videos/video/video-v1.0.0.json | 286 +++++------------- .../records/serializers/schemas/common.py | 11 + .../records/serializers/schemas/video.py | 2 + 8 files changed, 337 insertions(+), 426 deletions(-) create mode 100644 cds/modules/deposit/schemas/deposits/records/videos/video/definitions-v1.0.0.json create mode 100644 cds/modules/records/schemas/records/videos/video/definitions-v1.0.0.json diff --git a/cds/modules/deposit/mappings/os-v2/deposits/records/videos/video/video-v1.0.0.json b/cds/modules/deposit/mappings/os-v2/deposits/records/videos/video/video-v1.0.0.json index 74c0b78f1..4c25f0a66 100644 --- a/cds/modules/deposit/mappings/os-v2/deposits/records/videos/video/video-v1.0.0.json +++ b/cds/modules/deposit/mappings/os-v2/deposits/records/videos/video/video-v1.0.0.json @@ -358,6 +358,32 @@ }, "volumes": { "type": "text" + }, + "physical_location": { + "type": "text" + }, + "physical_medium": { + "type": "text" + }, + "internal_note": { + "type": "text" + }, + "legacy_marc_fields": { + "type": "object", + "properties": { + "964": { + "type": "text" + }, + "336": { + "type": "text" + }, + "583": { + "type": "text" + }, + "306": { + "type": "text" + } + } } } }, @@ -405,6 +431,12 @@ "type": "keyword" } } + }, + "collections": { + "type": "text" + }, + "additional_languages": { + "type": "text" } } } diff --git a/cds/modules/deposit/schemas/deposits/records/videos/video/definitions-v1.0.0.json b/cds/modules/deposit/schemas/deposits/records/videos/video/definitions-v1.0.0.json new file mode 100644 index 000000000..e1c217180 --- /dev/null +++ b/cds/modules/deposit/schemas/deposits/records/videos/video/definitions-v1.0.0.json @@ -0,0 +1,57 @@ +{ + "languages": { + "enum": [ + "ar", + "ast", + "bg", + "ca", + "ch", + "cs", + "cy", + "da", + "de", + "el", + "en", + "en-fr", + "es", + "et", + "eu", + "fi", + "fr", + "ga", + "gd", + "gl", + "he", + "hi", + "hr", + "hu", + "it", + "lt", + "ja", + "ka", + "ko", + "kw", + "nb", + "nl", + "nn", + "no", + "pl", + "pt", + "rm", + "ro", + "ru", + "se", + "silent", + "sk", + "sl", + "sr", + "sv", + "tr", + "uk", + "ur", + "zh", + "zh_CN", + "zh_TW" + ] + } +} diff --git a/cds/modules/deposit/schemas/deposits/records/videos/video/video-v1.0.0.json b/cds/modules/deposit/schemas/deposits/records/videos/video/video-v1.0.0.json index 79d22a6e7..975177f03 100644 --- a/cds/modules/deposit/schemas/deposits/records/videos/video/video-v1.0.0.json +++ b/cds/modules/deposit/schemas/deposits/records/videos/video/video-v1.0.0.json @@ -462,59 +462,7 @@ }, "language": { "default": "en", - "enum": [ - "ar", - "ast", - "bg", - "ca", - "ch", - "cs", - "cy", - "da", - "de", - "el", - "en", - "en-fr", - "es", - "et", - "eu", - "fi", - "fr", - "ga", - "gd", - "gl", - "he", - "hi", - "hr", - "hu", - "it", - "lt", - "ja", - "ka", - "ko", - "kw", - "nb", - "nl", - "nn", - "no", - "pl", - "pt", - "rm", - "ro", - "ru", - "se", - "silent", - "sk", - "sl", - "sr", - "sv", - "tr", - "uk", - "ur", - "zh", - "zh_CN", - "zh_TW" - ], + "$ref": "definitions-v1.0.0.json#/languages", "type": "string", "description": "A language of the resource." }, @@ -584,59 +532,7 @@ "language": { "description": "A language of the resource.", "default": "en", - "enum": [ - "ar", - "ast", - "bg", - "ca", - "ch", - "cs", - "cy", - "da", - "de", - "el", - "en", - "en-fr", - "es", - "et", - "eu", - "fi", - "fr", - "ga", - "gd", - "gl", - "he", - "hi", - "hr", - "hu", - "it", - "lt", - "ja", - "ka", - "ko", - "kw", - "nb", - "nl", - "nn", - "no", - "pl", - "pt", - "rm", - "ro", - "ru", - "se", - "silent", - "sk", - "sl", - "sr", - "sv", - "tr", - "uk", - "ur", - "zh", - "zh_CN", - "zh_TW" - ], + "$ref": "definitions-v1.0.0.json#/languages", "type": "string" }, "license": { @@ -743,6 +639,60 @@ "items": { "type": "string" } + }, + "physical_location": { + "title": "Tag 852 physical location.", + "type": "array", + "items": { + "type": "string" + } + }, + "physical_medium": { + "title": "Tag 340 physical medium.", + "type": "array", + "items": { + "type": "string" + } + }, + "internal_note": { + "title": "Tag 595 internal note.", + "type": "array", + "items": { + "type": "string" + } + }, + "legacy_marc_fields": { + "type": "object", + "properties": { + "964": { + "title": "Tag 964.", + "type": "array", + "items": { + "type": "string" + } + }, + "336": { + "title": "Tag 336.", + "type": "array", + "items": { + "type": "string" + } + }, + "583": { + "title": "Tag 583.", + "type": "array", + "items": { + "type": "string" + } + }, + "306": { + "title": "Tag 306.", + "type": "array", + "items": { + "type": "string" + } + } + } } }, "title": "Fields that needs curation.", @@ -765,59 +715,7 @@ }, "lang": { "type": "string", - "enum": [ - "ar", - "ast", - "bg", - "ca", - "ch", - "cs", - "cy", - "da", - "de", - "el", - "en", - "en-fr", - "es", - "et", - "eu", - "fi", - "fr", - "ga", - "gd", - "gl", - "he", - "hi", - "hr", - "hu", - "it", - "lt", - "ja", - "ka", - "ko", - "kw", - "nb", - "nl", - "nn", - "no", - "pl", - "pt", - "rm", - "ro", - "ru", - "se", - "silent", - "sk", - "sl", - "sr", - "sv", - "tr", - "uk", - "ur", - "zh", - "zh_CN", - "zh_TW" - ] + "$ref": "definitions-v1.0.0.json#/languages" } } } @@ -846,59 +744,7 @@ }, "lang": { "type": "string", - "enum": [ - "ar", - "ast", - "bg", - "ca", - "ch", - "cs", - "cy", - "da", - "de", - "el", - "en", - "en-fr", - "es", - "et", - "eu", - "fi", - "fr", - "ga", - "gd", - "gl", - "he", - "hi", - "hr", - "hu", - "it", - "lt", - "ja", - "ka", - "ko", - "kw", - "nb", - "nl", - "nn", - "no", - "pl", - "pt", - "rm", - "ro", - "ru", - "se", - "silent", - "sk", - "sl", - "sr", - "sv", - "tr", - "uk", - "ur", - "zh", - "zh_CN", - "zh_TW" - ] + "$ref": "definitions-v1.0.0.json#/languages" } } } @@ -926,11 +772,25 @@ }, "resource_type": { "type": "string", - "enum": ["Event", "ConferencePaper", "Report"], + "enum": ["Event", "ConferencePaper", "Report", "Book"], "description": "Type of the related resource." } } } + }, + "collections": { + "items": { + "type": "string" + }, + "type": "array" + }, + "additional_languages": { + "description": "Additional languages for the record.", + "type": "array", + "items": { + "type": "string", + "$ref": "definitions-v1.0.0.json#/languages" + } } } } \ No newline at end of file diff --git a/cds/modules/records/mappings/os-v2/records/videos/video/video-v1.0.0.json b/cds/modules/records/mappings/os-v2/records/videos/video/video-v1.0.0.json index 8fca391b5..9129b759b 100644 --- a/cds/modules/records/mappings/os-v2/records/videos/video/video-v1.0.0.json +++ b/cds/modules/records/mappings/os-v2/records/videos/video/video-v1.0.0.json @@ -356,6 +356,32 @@ }, "volumes": { "type": "text" + }, + "physical_location": { + "type": "text" + }, + "physical_medium": { + "type": "text" + }, + "internal_note": { + "type": "text" + }, + "legacy_marc_fields": { + "type": "object", + "properties": { + "964": { + "type": "text" + }, + "336": { + "type": "text" + }, + "583": { + "type": "text" + }, + "306": { + "type": "text" + } + } } } }, @@ -403,6 +429,12 @@ "type": "keyword" } } + }, + "collections": { + "type": "text" + }, + "additional_languages": { + "type": "text" } } } diff --git a/cds/modules/records/schemas/records/videos/video/definitions-v1.0.0.json b/cds/modules/records/schemas/records/videos/video/definitions-v1.0.0.json new file mode 100644 index 000000000..e1c217180 --- /dev/null +++ b/cds/modules/records/schemas/records/videos/video/definitions-v1.0.0.json @@ -0,0 +1,57 @@ +{ + "languages": { + "enum": [ + "ar", + "ast", + "bg", + "ca", + "ch", + "cs", + "cy", + "da", + "de", + "el", + "en", + "en-fr", + "es", + "et", + "eu", + "fi", + "fr", + "ga", + "gd", + "gl", + "he", + "hi", + "hr", + "hu", + "it", + "lt", + "ja", + "ka", + "ko", + "kw", + "nb", + "nl", + "nn", + "no", + "pl", + "pt", + "rm", + "ro", + "ru", + "se", + "silent", + "sk", + "sl", + "sr", + "sv", + "tr", + "uk", + "ur", + "zh", + "zh_CN", + "zh_TW" + ] + } +} diff --git a/cds/modules/records/schemas/records/videos/video/video-v1.0.0.json b/cds/modules/records/schemas/records/videos/video/video-v1.0.0.json index a70554de4..2e47eb108 100644 --- a/cds/modules/records/schemas/records/videos/video/video-v1.0.0.json +++ b/cds/modules/records/schemas/records/videos/video/video-v1.0.0.json @@ -421,59 +421,7 @@ }, "language": { "default": "en", - "enum": [ - "ar", - "ast", - "bg", - "ca", - "ch", - "cs", - "cy", - "da", - "de", - "el", - "en", - "en-fr", - "es", - "et", - "eu", - "fi", - "fr", - "ga", - "gd", - "gl", - "he", - "hi", - "hr", - "hu", - "it", - "lt", - "ja", - "ka", - "ko", - "kw", - "nb", - "nl", - "nn", - "no", - "pl", - "pt", - "rm", - "ro", - "ru", - "se", - "silent", - "sk", - "sl", - "sr", - "sv", - "tr", - "uk", - "ur", - "zh", - "zh_CN", - "zh_TW" - ], + "$ref": "definitions-v1.0.0.json#/languages", "type": "string", "description": "A language of the resource." }, @@ -559,59 +507,7 @@ "language": { "description": "A language of the resource.", "default": "en", - "enum": [ - "ar", - "ast", - "bg", - "ca", - "ch", - "cs", - "cy", - "da", - "de", - "el", - "en", - "en-fr", - "es", - "et", - "eu", - "fi", - "fr", - "ga", - "gd", - "gl", - "he", - "hi", - "hr", - "hu", - "it", - "lt", - "ja", - "ka", - "ko", - "kw", - "nb", - "nl", - "nn", - "no", - "pl", - "pt", - "rm", - "ro", - "ru", - "se", - "silent", - "sk", - "sl", - "sr", - "sv", - "tr", - "uk", - "ur", - "zh", - "zh_CN", - "zh_TW" - ], + "$ref": "definitions-v1.0.0.json#/languages", "type": "string" }, "accelerator_experiment": { @@ -678,6 +574,60 @@ "items": { "type": "string" } + }, + "physical_location": { + "title": "Tag 852 physical location.", + "type": "array", + "items": { + "type": "string" + } + }, + "physical_medium": { + "title": "Tag 340 physical medium.", + "type": "array", + "items": { + "type": "string" + } + }, + "internal_note": { + "title": "Tag 595 internal note.", + "type": "array", + "items": { + "type": "string" + } + }, + "legacy_marc_fields": { + "type": "object", + "properties": { + "964": { + "title": "Tag 964.", + "type": "array", + "items": { + "type": "string" + } + }, + "336": { + "title": "Tag 336.", + "type": "array", + "items": { + "type": "string" + } + }, + "583": { + "title": "Tag 583.", + "type": "array", + "items": { + "type": "string" + } + }, + "306": { + "title": "Tag 306.", + "type": "array", + "items": { + "type": "string" + } + } + } } }, "title": "Fields that needs curation.", @@ -700,59 +650,7 @@ }, "lang": { "type": "string", - "enum": [ - "ar", - "ast", - "bg", - "ca", - "ch", - "cs", - "cy", - "da", - "de", - "el", - "en", - "en-fr", - "es", - "et", - "eu", - "fi", - "fr", - "ga", - "gd", - "gl", - "he", - "hi", - "hr", - "hu", - "it", - "lt", - "ja", - "ka", - "ko", - "kw", - "nb", - "nl", - "nn", - "no", - "pl", - "pt", - "rm", - "ro", - "ru", - "se", - "silent", - "sk", - "sl", - "sr", - "sv", - "tr", - "uk", - "ur", - "zh", - "zh_CN", - "zh_TW" - ] + "$ref": "definitions-v1.0.0.json#/languages" } } } @@ -781,59 +679,7 @@ }, "lang": { "type": "string", - "enum": [ - "ar", - "ast", - "bg", - "ca", - "ch", - "cs", - "cy", - "da", - "de", - "el", - "en", - "en-fr", - "es", - "et", - "eu", - "fi", - "fr", - "ga", - "gd", - "gl", - "he", - "hi", - "hr", - "hu", - "it", - "lt", - "ja", - "ka", - "ko", - "kw", - "nb", - "nl", - "nn", - "no", - "pl", - "pt", - "rm", - "ro", - "ru", - "se", - "silent", - "sk", - "sl", - "sr", - "sv", - "tr", - "uk", - "ur", - "zh", - "zh_CN", - "zh_TW" - ] + "$ref": "definitions-v1.0.0.json#/languages" } } } @@ -861,11 +707,25 @@ }, "resource_type": { "type": "string", - "enum": ["Event", "ConferencePaper", "Report"], + "enum": ["Event", "ConferencePaper", "Report", "Book"], "description": "Type of the related resource." } } } + }, + "collections": { + "items": { + "type": "string" + }, + "type": "array" + }, + "additional_languages": { + "description": "Additional languages for the record.", + "type": "array", + "items": { + "type": "string", + "$ref": "definitions-v1.0.0.json#/languages" + } } }, "title": "CDS Base Record Schema v1.0.0" diff --git a/cds/modules/records/serializers/schemas/common.py b/cds/modules/records/serializers/schemas/common.py index 3764a75b4..cccc3ab32 100644 --- a/cds/modules/records/serializers/schemas/common.py +++ b/cds/modules/records/serializers/schemas/common.py @@ -164,12 +164,23 @@ class AlternateIdentifiersSchema(StrictKeysSchema): scheme = fields.Str(required=True) +class LegacyMARCFieldsSchema(Schema): + tag_964 = fields.List(fields.Str(), data_key="964") + tag_336 = fields.List(fields.Str(), data_key="336") + tag_583 = fields.List(fields.Str(), data_key="583") + tag_306 = fields.List(fields.Str(), data_key="306") + + class CurationSchema(StrictKeysSchema): """Curation schema.""" legacy_report_number = fields.Str() department = fields.Str() volumes = fields.List(fields.Str()) + physical_location = fields.List(fields.Str()) + physical_medium = fields.List(fields.Str()) + internal_note = fields.List(fields.Str()) + legacy_marc_fields = fields.Nested(LegacyMARCFieldsSchema) class AdditionalTitlesSchema(Schema): diff --git a/cds/modules/records/serializers/schemas/video.py b/cds/modules/records/serializers/schemas/video.py index dda00f1e0..88fcb3d8c 100644 --- a/cds/modules/records/serializers/schemas/video.py +++ b/cds/modules/records/serializers/schemas/video.py @@ -164,6 +164,8 @@ class VideoSchema(StrictKeysSchema): related_identifiers = fields.Nested( RelatedIdentifiersSchema, many=True ) + collections = fields.List(fields.Str, many=True) + additional_languages = fields.List(fields.Str, many=True) # Preservation fields location = fields.Str() From b916411861024b7cff2d2621a8941f81fc85954c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Z=C3=BCbeyde=20Civelek?= Date: Wed, 25 Jun 2025 17:38:06 +0200 Subject: [PATCH 09/41] mappings: change collections to keyword --- .../os-v2/deposits/records/videos/video/video-v1.0.0.json | 2 +- .../mappings/os-v2/records/videos/video/video-v1.0.0.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/cds/modules/deposit/mappings/os-v2/deposits/records/videos/video/video-v1.0.0.json b/cds/modules/deposit/mappings/os-v2/deposits/records/videos/video/video-v1.0.0.json index 4c25f0a66..5faea8b65 100644 --- a/cds/modules/deposit/mappings/os-v2/deposits/records/videos/video/video-v1.0.0.json +++ b/cds/modules/deposit/mappings/os-v2/deposits/records/videos/video/video-v1.0.0.json @@ -433,7 +433,7 @@ } }, "collections": { - "type": "text" + "type": "keyword" }, "additional_languages": { "type": "text" diff --git a/cds/modules/records/mappings/os-v2/records/videos/video/video-v1.0.0.json b/cds/modules/records/mappings/os-v2/records/videos/video/video-v1.0.0.json index 9129b759b..31ee6b633 100644 --- a/cds/modules/records/mappings/os-v2/records/videos/video/video-v1.0.0.json +++ b/cds/modules/records/mappings/os-v2/records/videos/video/video-v1.0.0.json @@ -431,7 +431,7 @@ } }, "collections": { - "type": "text" + "type": "keyword" }, "additional_languages": { "type": "text" From 29a864dd5c61ef687170ed6e5152fae6065c4684 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Z=C3=BCbeyde=20Civelek?= Date: Fri, 4 Jul 2025 16:23:49 +0200 Subject: [PATCH 10/41] schema: add digitized to curation --- .../records/videos/video/video-v1.0.0.json | 26 +++++++++++++++ .../records/videos/video/video-v1.0.0.json | 33 ++++++++++++++++++- .../records/videos/video/video-v1.0.0.json | 26 +++++++++++++++ .../records/videos/video/video-v1.0.0.json | 33 ++++++++++++++++++- .../records/serializers/schemas/common.py | 13 +++++++- 5 files changed, 128 insertions(+), 3 deletions(-) diff --git a/cds/modules/deposit/mappings/os-v2/deposits/records/videos/video/video-v1.0.0.json b/cds/modules/deposit/mappings/os-v2/deposits/records/videos/video/video-v1.0.0.json index 5faea8b65..01969cb3c 100644 --- a/cds/modules/deposit/mappings/os-v2/deposits/records/videos/video/video-v1.0.0.json +++ b/cds/modules/deposit/mappings/os-v2/deposits/records/videos/video/video-v1.0.0.json @@ -368,6 +368,32 @@ "internal_note": { "type": "text" }, + "digitized": { + "type": "object", + "properties": { + "url": { + "type": "text" + }, + "format": { + "type": "text" + }, + "link_text": { + "type": "text" + }, + "public_note": { + "type": "text" + }, + "nonpublic_note": { + "type": "text" + }, + "md5_checksum": { + "type": "text" + }, + "source": { + "type": "text" + } + } + }, "legacy_marc_fields": { "type": "object", "properties": { diff --git a/cds/modules/deposit/schemas/deposits/records/videos/video/video-v1.0.0.json b/cds/modules/deposit/schemas/deposits/records/videos/video/video-v1.0.0.json index 975177f03..c35b48b02 100644 --- a/cds/modules/deposit/schemas/deposits/records/videos/video/video-v1.0.0.json +++ b/cds/modules/deposit/schemas/deposits/records/videos/video/video-v1.0.0.json @@ -627,7 +627,10 @@ "properties": { "legacy_report_number": { "title": "Legacy record report number.", - "type": "string" + "type": "array", + "items": { + "type": "string" + } }, "department": { "title": "CERN department.", @@ -661,6 +664,33 @@ "type": "string" } }, + "digitized": { + "title": "Digitized metadata.", + "type": "object", + "properties": { + "url": { + "type": "string" + }, + "format": { + "type": "string" + }, + "link_text": { + "type": "string" + }, + "public_note": { + "type": "string" + }, + "nonpublic_note": { + "type": "string" + }, + "md5_checksum": { + "type": "string" + }, + "source": { + "type": "string" + } + } + }, "legacy_marc_fields": { "type": "object", "properties": { @@ -696,6 +726,7 @@ } }, "title": "Fields that needs curation.", + "description": "This section contains MARC21 metadata fields that could not be mapped during weblectures migration.", "type": "object" }, "additional_titles": { diff --git a/cds/modules/records/mappings/os-v2/records/videos/video/video-v1.0.0.json b/cds/modules/records/mappings/os-v2/records/videos/video/video-v1.0.0.json index 31ee6b633..c4c589fcf 100644 --- a/cds/modules/records/mappings/os-v2/records/videos/video/video-v1.0.0.json +++ b/cds/modules/records/mappings/os-v2/records/videos/video/video-v1.0.0.json @@ -366,6 +366,32 @@ "internal_note": { "type": "text" }, + "digitized": { + "type": "object", + "properties": { + "url": { + "type": "text" + }, + "format": { + "type": "text" + }, + "link_text": { + "type": "text" + }, + "public_note": { + "type": "text" + }, + "nonpublic_note": { + "type": "text" + }, + "md5_checksum": { + "type": "text" + }, + "source": { + "type": "text" + } + } + }, "legacy_marc_fields": { "type": "object", "properties": { diff --git a/cds/modules/records/schemas/records/videos/video/video-v1.0.0.json b/cds/modules/records/schemas/records/videos/video/video-v1.0.0.json index 2e47eb108..362e0e541 100644 --- a/cds/modules/records/schemas/records/videos/video/video-v1.0.0.json +++ b/cds/modules/records/schemas/records/videos/video/video-v1.0.0.json @@ -562,7 +562,10 @@ "properties": { "legacy_report_number": { "title": "Legacy record report number.", - "type": "string" + "type": "array", + "items": { + "type": "string" + } }, "department": { "title": "CERN department.", @@ -596,6 +599,33 @@ "type": "string" } }, + "digitized": { + "title": "Digitized metadata.", + "type": "object", + "properties": { + "url": { + "type": "string" + }, + "format": { + "type": "string" + }, + "link_text": { + "type": "string" + }, + "public_note": { + "type": "string" + }, + "nonpublic_note": { + "type": "string" + }, + "md5_checksum": { + "type": "string" + }, + "source": { + "type": "string" + } + } + }, "legacy_marc_fields": { "type": "object", "properties": { @@ -631,6 +661,7 @@ } }, "title": "Fields that needs curation.", + "description": "This section contains MARC21 metadata fields that could not be mapped during weblectures migration.", "type": "object" }, "additional_titles": { diff --git a/cds/modules/records/serializers/schemas/common.py b/cds/modules/records/serializers/schemas/common.py index cccc3ab32..e0e03634f 100644 --- a/cds/modules/records/serializers/schemas/common.py +++ b/cds/modules/records/serializers/schemas/common.py @@ -171,16 +171,27 @@ class LegacyMARCFieldsSchema(Schema): tag_306 = fields.List(fields.Str(), data_key="306") +class DigitizedMetadataSchema(Schema): + url = fields.Str() + format = fields.Str() + link_text = fields.Str() + public_note = fields.Str() + nonpublic_note = fields.Str() + md5_checksum = fields.Str() + source = fields.Str() + + class CurationSchema(StrictKeysSchema): """Curation schema.""" - legacy_report_number = fields.Str() + legacy_report_number = fields.List(fields.Str()) department = fields.Str() volumes = fields.List(fields.Str()) physical_location = fields.List(fields.Str()) physical_medium = fields.List(fields.Str()) internal_note = fields.List(fields.Str()) legacy_marc_fields = fields.Nested(LegacyMARCFieldsSchema) + digitized = fields.Nested(DigitizedMetadataSchema) class AdditionalTitlesSchema(Schema): From dbb10b3bee868c8e5abbd4fab3e4400527df2adf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Z=C3=BCbeyde=20Civelek?= Date: Wed, 9 Jul 2025 13:17:15 +0200 Subject: [PATCH 11/41] homepage: display lectures --- cds/config.py | 39 +++++++++++++++ cds/modules/home/templates/cds_home/home.html | 49 ++++++++++++++++++- .../theme/assets/bootstrap3/js/cds/app.js | 3 ++ .../theme/assets/bootstrap3/scss/cds/cds.scss | 25 ++++++++++ .../templates/cds/video/small_video_card.html | 27 ++++++++++ 5 files changed, 142 insertions(+), 1 deletion(-) create mode 100644 cds/modules/theme/static/templates/cds/video/small_video_card.html diff --git a/cds/config.py b/cds/config.py index 770a5e5c3..a7d80a5eb 100644 --- a/cds/config.py +++ b/cds/config.py @@ -290,6 +290,7 @@ def _parse_env_bool(var_name, default=None): SEARCH_UI_VIDEO_MEDIUM = "templates/cds/video/featured-medium.html" # Angular template for small size (used for search results) SEARCH_UI_VIDEO_SMALL = "templates/cds/video/small.html" +HOMEPAGE_VIDEO_SMALL = "templates/cds/video/small_video_card.html" # Invenio-Stats # ============= @@ -999,6 +1000,44 @@ def _parse_env_bool(var_name, default=None): }, ] +HOMEPAGE_DYNAMIC_QUERIES = [ + { + "label": "LECTURES", + "query": "/api/records/?size=6&sort=mostrecent&q=collections:Lectures", + "qs": "collections=Lectures", + "subcategories": [ + { + "label": "Academic Training Lectures", + "qs": 'collections="Lectures::Academic Training Lectures"', + }, + { + "label": "CERN Accelerator School Lectures", + "qs": 'collections="Lectures::CERN Accelerator School"', + }, + { + "label": "E-Learning", + "qs": 'collections="Lectures::E-learning modules"', + }, + { + "label": "Conference records", + "qs": 'collections="Lectures::Talks, Seminars and Other Events,Conference records"', + }, + { + "label": "Scientific Seminars and Workshops", + "qs": 'collections="Lectures::Talks, Seminars and Other Events::Scientific Seminars and Workshops"', + }, + { + "label": "Teacher Programmes", + "qs": 'collections="Lectures::Talks, Seminars and Other Events::Teacher Programmes"', + }, + { + "label": "Student Lectures", + "qs": 'collections="Lectures::Talks, Seminars and Other Events::Student Lectures"', + } + ] + } +] + FRONTPAGE_TREND_TOPICS = [ { "label": "Antimatter", diff --git a/cds/modules/home/templates/cds_home/home.html b/cds/modules/home/templates/cds_home/home.html index 8be685422..a8141323f 100644 --- a/cds/modules/home/templates/cds_home/home.html +++ b/cds/modules/home/templates/cds_home/home.html @@ -78,8 +78,55 @@

    {{ _('TRENDS') }} + + {% for section in config.HOMEPAGE_DYNAMIC_QUERIES %} +
    +
    +
    +
    +

    + {{ _(section.label) }} +

    +
    + + + {{ _('Explore all') }} + +
    + + {% if section.subcategories %} +
    + +
    + {% endif %} + +
    + + + + + +
    +
    +
    + {% endfor %} + + + -
    +
    diff --git a/cds/modules/theme/assets/bootstrap3/js/cds/app.js b/cds/modules/theme/assets/bootstrap3/js/cds/app.js index ef018e020..6a8f98b7b 100644 --- a/cds/modules/theme/assets/bootstrap3/js/cds/app.js +++ b/cds/modules/theme/assets/bootstrap3/js/cds/app.js @@ -131,4 +131,7 @@ angular.element(document).ready(function () { ["cds", "invenioSearch"], { strictDi: true } ); + document.querySelectorAll(".cds-dynamic-results").forEach((el) => { + angular.bootstrap(el, ["cds", "invenioSearch"], { strictDi: true }); + }); }); diff --git a/cds/modules/theme/assets/bootstrap3/scss/cds/cds.scss b/cds/modules/theme/assets/bootstrap3/scss/cds/cds.scss index f57f8f556..36f6a2806 100644 --- a/cds/modules/theme/assets/bootstrap3/scss/cds/cds.scss +++ b/cds/modules/theme/assets/bootstrap3/scss/cds/cds.scss @@ -314,6 +314,31 @@ a.cds-anchor:hover{ align-items: center; } +.cds-tags-custom { + display: flex; + flex-direction: column; + align-items: flex-start; + margin-top: 5px; + + li a { + max-width: 400px !important; + background-color: transparent !important; + border: 1px solid $cds-primary-color !important; + color: $cds-primary-color !important; + } +} + +.custom-flex-row { + display: flex; + justify-content: space-between; + align-items: center; +} + +.cds-video-title-text { + color: $cds-primary-color !important; + min-height: 3em; +} + ///////// // /search diff --git a/cds/modules/theme/static/templates/cds/video/small_video_card.html b/cds/modules/theme/static/templates/cds/video/small_video_card.html new file mode 100644 index 000000000..f5982b0f3 --- /dev/null +++ b/cds/modules/theme/static/templates/cds/video/small_video_card.html @@ -0,0 +1,27 @@ + From 4b6db83aaac4b27db189363a400612723a0ec223 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Z=C3=BCbeyde=20Civelek?= Date: Thu, 10 Jul 2025 11:26:52 +0200 Subject: [PATCH 12/41] category: add Lectures --- cds/modules/fixtures/data/categories.json | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/cds/modules/fixtures/data/categories.json b/cds/modules/fixtures/data/categories.json index 015ea1951..5f7fd8eb9 100644 --- a/cds/modules/fixtures/data/categories.json +++ b/cds/modules/fixtures/data/categories.json @@ -95,5 +95,15 @@ "_access": { "read": ["alice-secretariat@cern.ch"] } + }, + { + "name": "LECTURES", + "types": ["VIDEO"], + "access": { + "public": true, + "restricted": [], + "responsible": ["weblecture-service@cern.ch"] + }, + "_record_type": ["PROJECT"] } ] From a7e5fb2d6c41e33cac18c19cbbb103c8d475679b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Z=C3=BCbeyde=20Civelek?= Date: Thu, 10 Jul 2025 16:55:50 +0200 Subject: [PATCH 13/41] schema: fix _curation.digitized type --- .../records/videos/video/video-v1.0.0.json | 47 ++++++++++--------- .../records/videos/video/video-v1.0.0.json | 47 ++++++++++--------- 2 files changed, 50 insertions(+), 44 deletions(-) diff --git a/cds/modules/deposit/schemas/deposits/records/videos/video/video-v1.0.0.json b/cds/modules/deposit/schemas/deposits/records/videos/video/video-v1.0.0.json index c35b48b02..ae15d3d3c 100644 --- a/cds/modules/deposit/schemas/deposits/records/videos/video/video-v1.0.0.json +++ b/cds/modules/deposit/schemas/deposits/records/videos/video/video-v1.0.0.json @@ -666,28 +666,31 @@ }, "digitized": { "title": "Digitized metadata.", - "type": "object", - "properties": { - "url": { - "type": "string" - }, - "format": { - "type": "string" - }, - "link_text": { - "type": "string" - }, - "public_note": { - "type": "string" - }, - "nonpublic_note": { - "type": "string" - }, - "md5_checksum": { - "type": "string" - }, - "source": { - "type": "string" + "type": "array", + "items": { + "type": "object", + "properties": { + "url": { + "type": "string" + }, + "format": { + "type": "string" + }, + "link_text": { + "type": "string" + }, + "public_note": { + "type": "string" + }, + "nonpublic_note": { + "type": "string" + }, + "md5_checksum": { + "type": "string" + }, + "source": { + "type": "string" + } } } }, diff --git a/cds/modules/records/schemas/records/videos/video/video-v1.0.0.json b/cds/modules/records/schemas/records/videos/video/video-v1.0.0.json index 362e0e541..84ecfcde0 100644 --- a/cds/modules/records/schemas/records/videos/video/video-v1.0.0.json +++ b/cds/modules/records/schemas/records/videos/video/video-v1.0.0.json @@ -601,28 +601,31 @@ }, "digitized": { "title": "Digitized metadata.", - "type": "object", - "properties": { - "url": { - "type": "string" - }, - "format": { - "type": "string" - }, - "link_text": { - "type": "string" - }, - "public_note": { - "type": "string" - }, - "nonpublic_note": { - "type": "string" - }, - "md5_checksum": { - "type": "string" - }, - "source": { - "type": "string" + "type": "array", + "items": { + "type": "object", + "properties": { + "url": { + "type": "string" + }, + "format": { + "type": "string" + }, + "link_text": { + "type": "string" + }, + "public_note": { + "type": "string" + }, + "nonpublic_note": { + "type": "string" + }, + "md5_checksum": { + "type": "string" + }, + "source": { + "type": "string" + } } } }, From 234a1d7c5f5839e62fc5263aa8fc0b3ff69e63df Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Z=C3=BCbeyde=20Civelek?= Date: Tue, 15 Jul 2025 13:10:51 +0300 Subject: [PATCH 14/41] landing page: display related indico videos --- .../templates/cds_records/video/detail.html | 4 ++ .../video/related_indico_section.html | 39 +++++++++++++++++++ .../templates/cds_records/record_detail.html | 2 + .../theme/assets/bootstrap3/js/cds/module.js | 14 +++++++ .../assets/bootstrap3/js/cds_records/app.js | 2 + .../bootstrap3/js/cds_records/cdsRecord.js | 2 + .../theme/assets/bootstrap3/scss/cds/cds.scss | 4 ++ 7 files changed, 67 insertions(+) create mode 100644 cds/modules/records/static/templates/cds_records/video/related_indico_section.html diff --git a/cds/modules/records/static/templates/cds_records/video/detail.html b/cds/modules/records/static/templates/cds_records/video/detail.html index c253f1a43..6e2ef5315 100644 --- a/cds/modules/records/static/templates/cds_records/video/detail.html +++ b/cds/modules/records/static/templates/cds_records/video/detail.html @@ -202,6 +202,10 @@

    {{translation.title.title}}

    + +
    diff --git a/cds/modules/records/static/templates/cds_records/video/related_indico_section.html b/cds/modules/records/static/templates/cds_records/video/related_indico_section.html new file mode 100644 index 000000000..f7c46e012 --- /dev/null +++ b/cds/modules/records/static/templates/cds_records/video/related_indico_section.html @@ -0,0 +1,39 @@ +
    +
    + +
    +
    +
    +

    + More videos from this Indico event +

    + + Explore all + +
    +
    + + + + +
    +
    +
    diff --git a/cds/modules/records/templates/cds_records/record_detail.html b/cds/modules/records/templates/cds_records/record_detail.html index e1269dfc3..ab0efd30e 100644 --- a/cds/modules/records/templates/cds_records/record_detail.html +++ b/cds/modules/records/templates/cds_records/record_detail.html @@ -93,11 +93,13 @@ {% set user_action_media_download_url = '' %} {% endif %} {# TODO: remove and replace with jinja ? #} + {% set related_query_url = config.CDS_RECORDS_RELATED_QUERY or '' %}
    diff --git a/cds/modules/theme/assets/bootstrap3/js/cds/module.js b/cds/modules/theme/assets/bootstrap3/js/cds/module.js index 326b2a6f1..a414a4046 100644 --- a/cds/modules/theme/assets/bootstrap3/js/cds/module.js +++ b/cds/modules/theme/assets/bootstrap3/js/cds/module.js @@ -751,3 +751,17 @@ app.filter("assembleShareURL", [ }; }, ]); + + +angular.module("cds").directive("bootstrapInvenioSearch", function () { + return { + restrict: "A", + link: function (scope, element) { + try { + angular.bootstrap(element[0], ["cds", "invenioSearch"], { strictDi: true }); + } catch (e) { + if (!/already bootstrapped/.test(e.message)) throw e; + } + }, + }; +}); diff --git a/cds/modules/theme/assets/bootstrap3/js/cds_records/app.js b/cds/modules/theme/assets/bootstrap3/js/cds_records/app.js index e71424057..31db43e50 100644 --- a/cds/modules/theme/assets/bootstrap3/js/cds_records/app.js +++ b/cds/modules/theme/assets/bootstrap3/js/cds_records/app.js @@ -26,6 +26,7 @@ import "./cdsRecord"; import "angular-sanitize"; import "angular-strap"; import "invenio-files-js/dist/invenio-files-js"; +import "invenio-search-js/dist/invenio-search-js"; import "ngmodal"; import "./user_actions_logger"; @@ -42,6 +43,7 @@ angular.element(document).ready(function () { "ngclipboard", "invenioFiles.filters", "ngSanitize", + "invenioSearch", ], { strictDi: true } ); diff --git a/cds/modules/theme/assets/bootstrap3/js/cds_records/cdsRecord.js b/cds/modules/theme/assets/bootstrap3/js/cds_records/cdsRecord.js index 5ded313f3..f5943e5e7 100644 --- a/cds/modules/theme/assets/bootstrap3/js/cds_records/cdsRecord.js +++ b/cds/modules/theme/assets/bootstrap3/js/cds_records/cdsRecord.js @@ -212,6 +212,8 @@ function cdsRecordView($http) { function link(scope, element, attrs, vm) { scope.mediaDownloadEventUrl = attrs.mediaDownloadEventUrl; + scope.relatedQueryUrl = attrs.relatedQueryUrl; + // Get the record object and make it available to the scope $http.get(attrs.record).then( function (response) { diff --git a/cds/modules/theme/assets/bootstrap3/scss/cds/cds.scss b/cds/modules/theme/assets/bootstrap3/scss/cds/cds.scss index 36f6a2806..49dd82bb2 100644 --- a/cds/modules/theme/assets/bootstrap3/scss/cds/cds.scss +++ b/cds/modules/theme/assets/bootstrap3/scss/cds/cds.scss @@ -825,6 +825,10 @@ div[cds-search-results] { } } +.custom-video-style div[cds-search-results] .cds-video-title img { + height: 9em !important; +} + .cds-div-shadow { box-shadow: 0 1px 2px rgba(0,0,0,.1); } From 98f30cd96def230d262513bc40be37f02f1d9dea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Z=C3=BCbeyde=20Civelek?= Date: Tue, 15 Jul 2025 12:43:49 +0300 Subject: [PATCH 15/41] homepage: add hover to subcategories --- cds/config.py | 4 ++-- cds/modules/home/templates/cds_home/home.html | 2 -- cds/modules/theme/assets/bootstrap3/scss/cds/cds.scss | 6 +++++- .../theme/static/templates/cds/video/small_video_card.html | 2 +- 4 files changed, 8 insertions(+), 6 deletions(-) diff --git a/cds/config.py b/cds/config.py index a7d80a5eb..7de548a7d 100644 --- a/cds/config.py +++ b/cds/config.py @@ -1002,8 +1002,8 @@ def _parse_env_bool(var_name, default=None): HOMEPAGE_DYNAMIC_QUERIES = [ { - "label": "LECTURES", - "query": "/api/records/?size=6&sort=mostrecent&q=collections:Lectures", + "label": "LATEST LECTURES", + "query": "/api/records/?size=4&sort=mostrecent&q=collections:Lectures", "qs": "collections=Lectures", "subcategories": [ { diff --git a/cds/modules/home/templates/cds_home/home.html b/cds/modules/home/templates/cds_home/home.html index a8141323f..69f4382fc 100644 --- a/cds/modules/home/templates/cds_home/home.html +++ b/cds/modules/home/templates/cds_home/home.html @@ -83,11 +83,9 @@

    {{ _('TRENDS') }}
    -

    {{ _(section.label) }}

    -
    {{ _('Explore all') }} diff --git a/cds/modules/theme/assets/bootstrap3/scss/cds/cds.scss b/cds/modules/theme/assets/bootstrap3/scss/cds/cds.scss index 49dd82bb2..2bf0a6a90 100644 --- a/cds/modules/theme/assets/bootstrap3/scss/cds/cds.scss +++ b/cds/modules/theme/assets/bootstrap3/scss/cds/cds.scss @@ -325,7 +325,11 @@ a.cds-anchor:hover{ background-color: transparent !important; border: 1px solid $cds-primary-color !important; color: $cds-primary-color !important; - } + &:hover { + background-color:lighten($cds-primary-color, 10%) !important; + color: #fff !important; + } + } } .custom-flex-row { diff --git a/cds/modules/theme/static/templates/cds/video/small_video_card.html b/cds/modules/theme/static/templates/cds/video/small_video_card.html index f5982b0f3..b009c1d5f 100644 --- a/cds/modules/theme/static/templates/cds/video/small_video_card.html +++ b/cds/modules/theme/static/templates/cds/video/small_video_card.html @@ -1,5 +1,5 @@
    -
    Date: Thu, 17 Jul 2025 16:23:49 +0300 Subject: [PATCH 16/41] feature: follow and search transcriptions --- .../cds_deposit/types/video/uploader.html | 1 + .../cds_previewer/macros/player.html | 2 + .../templates/cds_records/video/detail.html | 71 +++++- .../cds_deposit/avc/components/cdsUploader.js | 18 +- .../bootstrap3/js/cds_records/cdsRecord.js | 210 +++++++++++++++++- .../theme/assets/bootstrap3/scss/cds/cds.scss | 16 ++ cds/modules/theme/webpack.py | 1 + 7 files changed, 312 insertions(+), 7 deletions(-) diff --git a/cds/modules/deposit/static/templates/cds_deposit/types/video/uploader.html b/cds/modules/deposit/static/templates/cds_deposit/types/video/uploader.html index 24640444d..829343572 100644 --- a/cds/modules/deposit/static/templates/cds_deposit/types/video/uploader.html +++ b/cds/modules/deposit/static/templates/cds_deposit/types/video/uploader.html @@ -282,6 +282,7 @@
    Tips and suggestions
    ngf-model-options="{allowInvalid: false}" ngf-max-size="500GB" ngf-multiple="true" + ngf-validate-fn="$ctrl.validateAdditionalFiles($file)" >

    diff --git a/cds/modules/previewer/templates/cds_previewer/macros/player.html b/cds/modules/previewer/templates/cds_previewer/macros/player.html index 9ec2ac671..a7301dd58 100644 --- a/cds/modules/previewer/templates/cds_previewer/macros/player.html +++ b/cds/modules/previewer/templates/cds_previewer/macros/player.html @@ -51,6 +51,8 @@ {% endif %} initialRendition: 'first' }); + + window.top.player = player; // Preload player.source = { sources: [ diff --git a/cds/modules/records/static/templates/cds_records/video/detail.html b/cds/modules/records/static/templates/cds_records/video/detail.html index 6e2ef5315..1d1a386ee 100644 --- a/cds/modules/records/static/templates/cds_records/video/detail.html +++ b/cds/modules/records/static/templates/cds_records/video/detail.html @@ -64,7 +64,7 @@

    - +
    {{translation.language | isoToLanguage}}

    {{translation.title.title}}

    @@ -201,6 +201,24 @@

    {{translation.title.title}}

    + +
    +
    +
    +

    Transcriptions

    + Follow along or search within the transcript. +
    + +
    + +
    +
    +
    -
    + +
    +
    +

    + Transcription +

    +
    + +
    + +
    + + +
    +
    +
    +
    diff --git a/cds/modules/theme/assets/bootstrap3/js/cds_deposit/avc/components/cdsUploader.js b/cds/modules/theme/assets/bootstrap3/js/cds_deposit/avc/components/cdsUploader.js index eaa4ae238..ce70be33d 100644 --- a/cds/modules/theme/assets/bootstrap3/js/cds_deposit/avc/components/cdsUploader.js +++ b/cds/modules/theme/assets/bootstrap3/js/cds_deposit/avc/components/cdsUploader.js @@ -314,9 +314,12 @@ function cdsUploaderCtrl( if (extraHeaders) { file.headers = extraHeaders; } - file.headers = { - "X-Invenio-File-Tags": "context_type=additional_file", - }; + + if (!extraHeaders || !("X-Invenio-File-Tags" in extraHeaders)) { + file.headers = { + "X-Invenio-File-Tags": "context_type=additional_file", + }; + } }); // Find if any of the existing files has been replaced @@ -545,6 +548,15 @@ function cdsUploaderCtrl( return match.length > 1 && match[1] in isoLanguages; }; + this.validateAdditionalFiles = function (_file) { + // If it's a .vtt file, validate as subtitle + if (_file.name.toLowerCase().endsWith(".vtt")) { + return this.validateSubtitles(_file); + } + // Accept other types + return true; + }; + this.updateFile = function (key, data, force) { var index = this.findFileIndex(that.files, key); if (index != -1) { diff --git a/cds/modules/theme/assets/bootstrap3/js/cds_records/cdsRecord.js b/cds/modules/theme/assets/bootstrap3/js/cds_records/cdsRecord.js index f5943e5e7..5aafbd655 100644 --- a/cds/modules/theme/assets/bootstrap3/js/cds_records/cdsRecord.js +++ b/cds/modules/theme/assets/bootstrap3/js/cds_records/cdsRecord.js @@ -26,6 +26,7 @@ */ import angular from "angular"; +import { WebVTT } from "vtt.js"; import { getCookie } from "../getCookie"; @@ -38,7 +39,7 @@ import { getCookie } from "../getCookie"; * @description * CDS record controller. */ -function cdsRecordController($scope, $sce, $http) { +function cdsRecordController($scope, $sce, $http, $timeout) { // Parameters // Assign the controller to `vm` @@ -53,11 +54,216 @@ function cdsRecordController($scope, $sce, $http) { // Record Warn - if the cdsRecord has any warning vm.cdsRecordWarning = null; + $scope.transcriptsByLanguage = {}; + $scope.transcript = []; + $scope.filteredTranscript = []; + $scope.selectedTranscriptLanguage = null; + const REQUEST_HEADERS = { "Content-Type": "application/json", "X-CSRFToken": getCookie("csrftoken"), }; + $scope.seekTo = function (timecode) { + const player = window.top.player; + if (player) { + player.currentTime = timecode; + if (player.paused) { + player.play().catch(function (err) { + console.warn("Autoplay might be blocked by the browser:", err); + }); + } + } else { + console.warn("Player not available"); + } + }; + + $scope.toggleTranscript = function () { + $scope.showTranscript = !$scope.showTranscript; + + // Jump to Transcriptions section + if ($scope.showTranscript) { + setTimeout(function () { + const el = document.getElementById("transcriptionsSection"); + if (el) { + const rect = el.getBoundingClientRect(); + const isVisible = + rect.top >= 0 && + rect.bottom <= + (window.innerHeight || document.documentElement.clientHeight); + + if (!isVisible) { + const topOffset = rect.top + window.scrollY - 60; + window.scrollTo({ top: topOffset, behavior: "smooth" }); + } + } + }, 100); + } + }; + + $scope.parseVttFromUrl = function (url, type, lang) { + fetch(url) + .then((res) => res.text()) + .then(function (vttText) { + const parser = new WebVTT.Parser(window, WebVTT.StringDecoder()); + const cues = {}; + + parser.oncue = function (cue) { + cues[cue.text] = { + start: cue.startTime, + end: cue.endTime, + text: cue.text, + }; + }; + + parser.parse(vttText); + parser.flush(); + + $timeout(function () { + if (type === "transcript") { + $scope.transcriptsByLanguage[lang] = cues; + + // Use the first one that loads + if (!$scope.selectedTranscriptLanguage) { + $scope.transcript = cues; + $scope.filterTranscript(); + $scope.selectedTranscriptLanguage = lang; + } + } else { + console.warn("Unknown type for VTT parsing:", type); + } + }); + }) + .catch(function (err) { + console.error("VTT parsing failed", err); + }); + }; + + $scope.transcriptSearch = ""; + + $scope.filterTranscript = function () { + var searchTerm = $scope.transcriptSearch.toLowerCase(); + $scope.filteredTranscript = Object.values($scope.transcript).filter( + function (line) { + return ( + !searchTerm || + (line.text && line.text.toLowerCase().indexOf(searchTerm) !== -1) + ); + } + ); + }; + + $scope.$watch("transcript", function (newVal) { + if (newVal) $scope.filterTranscript(); + }); + + $scope.$watch("record", function (newVal) { + if (newVal) { + $scope.initVttLoad(newVal); + } + }); + + $scope.initVttLoad = function (record) { + console.log("Initializing VTT load for record:", record); + const files = record.metadata._files || []; + + // Subtitles (transcripts) + const transcriptVttFiles = files.filter( + (f) => f.context_type === "subtitle" && f.content_type === "vtt" + ); + + // Step 2: If found, load it + transcriptVttFiles.forEach((file) => { + const lang = file.tags.language || "unknown"; + if (file.links?.self) { + $scope.parseVttFromUrl(file.links.self, "transcript", lang); + } else { + console.warn("No subtitle file found."); + } + }); + }; + + $scope.setTranscriptLanguage = function (lang) { + if ($scope.transcriptsByLanguage[lang]) { + $scope.transcript = $scope.transcriptsByLanguage[lang]; + $scope.filterTranscript(); + } else { + console.warn("Transcript not found for language:", lang); + } + }; + + // Follow transcriptions + $scope.currentTranscriptLine = null; + function getScrollableParent(el) { + while (el && el !== document.body) { + const style = window.getComputedStyle(el); + const overflowY = style.overflowY; + if (overflowY === "auto" || overflowY === "scroll") { + return el; + } + el = el.parentElement; + } + return null; + } + function updateTranscriptHighlight() { + const player = window.top.player; + if (!player || !$scope.transcript) return; + + const currentTime = player.currentTime; + const lines = Object.values($scope.transcript); + + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + if (currentTime >= line.start && currentTime <= line.end) { + if ($scope.currentTranscriptLine !== line) { + $scope.currentTranscriptLine = line; + $scope.$applyAsync(); // Trigger Angular update + + // Auto-scroll to the active line + setTimeout(() => { + const el = document.querySelector(".transcript-line.active"); + const container = getScrollableParent(el); + + if (el && container) { + const elRect = el.getBoundingClientRect(); + const containerRect = container.getBoundingClientRect(); + + const currentScroll = container.scrollTop; + const topOffset = elRect.top - containerRect.top; + + const targetScroll = currentScroll + topOffset - 10; + + container.scrollTo({ + top: targetScroll, + behavior: "smooth", + }); + } + }, 50); + } + return; + } + } + + $scope.currentTranscriptLine = null; + $scope.$applyAsync(); + } + + let transcriptTimer = setInterval(updateTranscriptHighlight, 100); + + $scope.$on("$destroy", function () { + clearInterval(transcriptTimer); + }); + + $scope.convertToMinutesSeconds = function (seconds) { + const minutes = Math.floor(seconds / 60); + const secs = Math.floor(seconds % 60); + + // Pad with zero if needed + const paddedSecs = secs < 10 ? "0" + secs : secs; + + return `${minutes}:${paddedSecs}`; + }; + /** * Trust iframe url * @memberof cdsRecordController @@ -180,7 +386,7 @@ function cdsRecordController($scope, $sce, $http) { $scope.$on("cds.record.loading.stop", cdsRecordLoadingStop); } -cdsRecordController.$inject = ["$scope", "$sce", "$http"]; +cdsRecordController.$inject = ["$scope", "$sce", "$http", "$timeout"]; //////////// diff --git a/cds/modules/theme/assets/bootstrap3/scss/cds/cds.scss b/cds/modules/theme/assets/bootstrap3/scss/cds/cds.scss index 2bf0a6a90..ef682426e 100644 --- a/cds/modules/theme/assets/bootstrap3/scss/cds/cds.scss +++ b/cds/modules/theme/assets/bootstrap3/scss/cds/cds.scss @@ -1116,3 +1116,19 @@ div[cds-search-results] { } } } + +.transcript-line:hover, +.transcript-line.active { + background-color: $gray-lighter; +} + +.transcription-button { + border-radius: 9px !important; + border: 1px solid $cds-primary-color !important; + color: $cds-primary-color !important; +} + +.transcription-button:hover { + background-color: $cds-primary-color !important; + color: #fff !important; +} \ No newline at end of file diff --git a/cds/modules/theme/webpack.py b/cds/modules/theme/webpack.py index 420a08ca9..a4f232d33 100644 --- a/cds/modules/theme/webpack.py +++ b/cds/modules/theme/webpack.py @@ -98,6 +98,7 @@ "rr-ng-ckeditor": "~0.2.1", # needed because ci fails on tests otherwise. not imported in any bundle "semantic-ui-less": "^2.4.1", + "vtt.js": "~0.13.0", }, aliases={ "@js/cds": "js/cds", From 08c6254fd780c2ee29ddbe7f556801ea457e9842 Mon Sep 17 00:00:00 2001 From: zzacharo Date: Fri, 18 Jul 2025 16:37:50 +0200 Subject: [PATCH 17/41] transcriptions: move them side-by-side to player and add hide button --- cds/config.py | 18 ++- .../templates/cds_records/video/detail.html | 125 ++++++++++-------- .../theme/assets/bootstrap3/scss/cds/cds.scss | 5 + 3 files changed, 83 insertions(+), 65 deletions(-) diff --git a/cds/config.py b/cds/config.py index 7de548a7d..66b337d84 100644 --- a/cds/config.py +++ b/cds/config.py @@ -1003,9 +1003,9 @@ def _parse_env_bool(var_name, default=None): HOMEPAGE_DYNAMIC_QUERIES = [ { "label": "LATEST LECTURES", - "query": "/api/records/?size=4&sort=mostrecent&q=collections:Lectures", + "query": "/api/records/?size=4&sort=mostrecent&q=collections:Lectures", "qs": "collections=Lectures", - "subcategories": [ + "subcategories": [ { "label": "Academic Training Lectures", "qs": 'collections="Lectures::Academic Training Lectures"', @@ -1017,15 +1017,15 @@ def _parse_env_bool(var_name, default=None): { "label": "E-Learning", "qs": 'collections="Lectures::E-learning modules"', - }, + }, { "label": "Conference records", "qs": 'collections="Lectures::Talks, Seminars and Other Events,Conference records"', - }, + }, { "label": "Scientific Seminars and Workshops", "qs": 'collections="Lectures::Talks, Seminars and Other Events::Scientific Seminars and Workshops"', - }, + }, { "label": "Teacher Programmes", "qs": 'collections="Lectures::Talks, Seminars and Other Events::Teacher Programmes"', @@ -1033,8 +1033,8 @@ def _parse_env_bool(var_name, default=None): { "label": "Student Lectures", "qs": 'collections="Lectures::Talks, Seminars and Other Events::Student Lectures"', - } - ] + }, + ], } ] @@ -1297,6 +1297,7 @@ def _parse_env_bool(var_name, default=None): # Licence key and base URL for THEO player THEOPLAYER_LIBRARY_LOCATION = None THEOPLAYER_LICENSE = None + # Wowza server URL for m3u8 playlist generation WOWZA_PLAYLIST_URL = ( "https://wowza.cern.ch/cds/_definist_/smil:" "{filepath}/playlist.m3u8" @@ -1672,3 +1673,6 @@ def _parse_env_bool(var_name, default=None): # Sets the location to share the video files among the different tasks CDS_FILES_TMP_FOLDER = "/tmp/videos" + +# TODO: needs latest files-rest enabling range requests +FILES_REST_ALLOW_RANGE_REQUESTS = True diff --git a/cds/modules/records/static/templates/cds_records/video/detail.html b/cds/modules/records/static/templates/cds_records/video/detail.html index 1d1a386ee..c64ff3786 100644 --- a/cds/modules/records/static/templates/cds_records/video/detail.html +++ b/cds/modules/records/static/templates/cds_records/video/detail.html @@ -4,16 +4,74 @@
    -
    - +
    +
    +
    +
    + +
    +
    + +
    +
    +
    +

    + Transcription +
    +

    +
    + +
    + +
    + + +
    +
    +
    +
    +
    @@ -181,6 +239,7 @@

    {{translation.title.title}}

    +
    @@ -237,56 +296,6 @@

    - -
    -
    -

    - Transcription -

    -
    - -
    - -
    - - -
    -
    -
    -
    diff --git a/cds/modules/theme/assets/bootstrap3/scss/cds/cds.scss b/cds/modules/theme/assets/bootstrap3/scss/cds/cds.scss index ef682426e..1ccd6e942 100644 --- a/cds/modules/theme/assets/bootstrap3/scss/cds/cds.scss +++ b/cds/modules/theme/assets/bootstrap3/scss/cds/cds.scss @@ -1131,4 +1131,9 @@ div[cds-search-results] { .transcription-button:hover { background-color: $cds-primary-color !important; color: #fff !important; +} + +.transcription-close +.transcription-close:hover { + cursor: pointer; } \ No newline at end of file From 3241f6ea20e3ca0e5ad9b18b9fc3031601e1a222 Mon Sep 17 00:00:00 2001 From: zzacharo Date: Thu, 24 Jul 2025 16:08:04 +0200 Subject: [PATCH 18/41] templates: rename related_indico_section to related_event_section and remove Indico from section label --- .../records/static/templates/cds_records/video/detail.html | 2 +- .../{related_indico_section.html => related_event_section.html} | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) rename cds/modules/records/static/templates/cds_records/video/{related_indico_section.html => related_event_section.html} (96%) diff --git a/cds/modules/records/static/templates/cds_records/video/detail.html b/cds/modules/records/static/templates/cds_records/video/detail.html index c64ff3786..a8444c7e1 100644 --- a/cds/modules/records/static/templates/cds_records/video/detail.html +++ b/cds/modules/records/static/templates/cds_records/video/detail.html @@ -281,7 +281,7 @@

    {{translation.title.title}}

    + src="'/static/templates/cds_records/video/related_event_section.html'">
    diff --git a/cds/modules/records/static/templates/cds_records/video/related_indico_section.html b/cds/modules/records/static/templates/cds_records/video/related_event_section.html similarity index 96% rename from cds/modules/records/static/templates/cds_records/video/related_indico_section.html rename to cds/modules/records/static/templates/cds_records/video/related_event_section.html index f7c46e012..606726d26 100644 --- a/cds/modules/records/static/templates/cds_records/video/related_indico_section.html +++ b/cds/modules/records/static/templates/cds_records/video/related_event_section.html @@ -15,7 +15,7 @@

    - More videos from this Indico event + More videos from this event

    Date: Fri, 25 Jul 2025 16:41:25 +0200 Subject: [PATCH 19/41] detail: fix transcript filtering --- .../bootstrap3/js/cds_records/cdsRecord.js | 25 ++++++++----------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/cds/modules/theme/assets/bootstrap3/js/cds_records/cdsRecord.js b/cds/modules/theme/assets/bootstrap3/js/cds_records/cdsRecord.js index 5aafbd655..8ec43af26 100644 --- a/cds/modules/theme/assets/bootstrap3/js/cds_records/cdsRecord.js +++ b/cds/modules/theme/assets/bootstrap3/js/cds_records/cdsRecord.js @@ -58,6 +58,7 @@ function cdsRecordController($scope, $sce, $http, $timeout) { $scope.transcript = []; $scope.filteredTranscript = []; $scope.selectedTranscriptLanguage = null; + $scope.transcriptSearch = ""; const REQUEST_HEADERS = { "Content-Type": "application/json", @@ -139,18 +140,15 @@ function cdsRecordController($scope, $sce, $http, $timeout) { }); }; - $scope.transcriptSearch = ""; - $scope.filterTranscript = function () { - var searchTerm = $scope.transcriptSearch.toLowerCase(); - $scope.filteredTranscript = Object.values($scope.transcript).filter( - function (line) { - return ( - !searchTerm || - (line.text && line.text.toLowerCase().indexOf(searchTerm) !== -1) - ); - } - ); + var searchTerm = this.transcriptSearch.toLowerCase(); + $scope.filteredTranscript = Object.values($scope.transcript).filter(function ( + line + ) { + return ( + !searchTerm || (line.text && line.text.toLowerCase().indexOf(searchTerm) !== -1) + ); + }); }; $scope.$watch("transcript", function (newVal) { @@ -164,7 +162,6 @@ function cdsRecordController($scope, $sce, $http, $timeout) { }); $scope.initVttLoad = function (record) { - console.log("Initializing VTT load for record:", record); const files = record.metadata._files || []; // Subtitles (transcripts) @@ -473,9 +470,7 @@ cdsRecordView.$inject = ["$http"]; // Setup everything -angular - .module("cdsRecord.directives", []) - .directive("cdsRecordView", cdsRecordView); +angular.module("cdsRecord.directives", []).directive("cdsRecordView", cdsRecordView); angular .module("cdsRecord.controllers", []) From 262c8c9bd324ed740738085307cedfdb1c701ca6 Mon Sep 17 00:00:00 2001 From: zzacharo Date: Fri, 25 Jul 2025 17:07:54 +0200 Subject: [PATCH 20/41] detail: fix toggle transcript css --- cds/modules/theme/assets/bootstrap3/scss/cds/cds.scss | 1 - 1 file changed, 1 deletion(-) diff --git a/cds/modules/theme/assets/bootstrap3/scss/cds/cds.scss b/cds/modules/theme/assets/bootstrap3/scss/cds/cds.scss index 1ccd6e942..c923bc8f4 100644 --- a/cds/modules/theme/assets/bootstrap3/scss/cds/cds.scss +++ b/cds/modules/theme/assets/bootstrap3/scss/cds/cds.scss @@ -1133,7 +1133,6 @@ div[cds-search-results] { color: #fff !important; } -.transcription-close .transcription-close:hover { cursor: pointer; } \ No newline at end of file From b9596f8ea3517b16491ad90f4636d65b6fc72110 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Z=C3=BCbeyde=20Civelek?= Date: Fri, 15 Aug 2025 14:41:02 +0200 Subject: [PATCH 21/41] landing page: remove thumbnails download --- .../cds_records/video/downloads.html | 30 ------------------- 1 file changed, 30 deletions(-) diff --git a/cds/modules/records/static/templates/cds_records/video/downloads.html b/cds/modules/records/static/templates/cds_records/video/downloads.html index aa6e350a8..26ddb78e1 100644 --- a/cds/modules/records/static/templates/cds_records/video/downloads.html +++ b/cds/modules/records/static/templates/cds_records/video/downloads.html @@ -86,36 +86,6 @@

    - - - -
    From d2c68e070205a6a92666a5fefae044a01249a97a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Z=C3=BCbeyde=20Civelek?= Date: Thu, 14 Aug 2025 15:13:28 +0200 Subject: [PATCH 22/41] form: change related links to related identifiers --- .../static/json/cds_deposit/forms/video.json | 138 +++++++++++++++++- .../cds_deposit/types/video/form.html | 2 +- 2 files changed, 133 insertions(+), 7 deletions(-) diff --git a/cds/modules/deposit/static/json/cds_deposit/forms/video.json b/cds/modules/deposit/static/json/cds_deposit/forms/video.json index 981de0f3e..1d5448dd2 100644 --- a/cds/modules/deposit/static/json/cds_deposit/forms/video.json +++ b/cds/modules/deposit/static/json/cds_deposit/forms/video.json @@ -567,17 +567,143 @@ ], "related_links": [ { - "key": "related_links", + "key": "related_identifiers", "type": "array", - "add": "Add related links", + "add": "Add related identifiers", + "title": "Related Identifiers", + "description": "Add identifiers for related resources such as DOIs, URLs, or Indico event IDs.", "items": [ { - "title": "Name", - "key": "related_links[].name" + "title": "Identifier", + "key": "related_identifiers[].identifier", + "required": true, + "placeholder": "e.g., 10.1234/example.doi, https://example.com, 12345", + "description": "The identifier value (DOI, URL, or Indico event ID)" }, { - "title": "URL", - "key": "related_links[].url" + "title": "Scheme", + "key": "related_identifiers[].scheme", + "type": "select", + "required": true, + "placeholder": "Select identifier scheme", + "description": "The type of identifier scheme", + "titleMap": [ + { + "value": "URL", + "name": "URL (Uniform Resource Locator)" + }, + { + "value": "DOI", + "name": "DOI (Digital Object Identifier)" + }, + { + "value": "Indico", + "name": "Indico (Event ID)" + }, + { + "value": "CDS", + "name": "CDS (CERN Document Server Record ID)" + } + ] + }, + { + "title": "Relation Type", + "key": "related_identifiers[].relation_type", + "type": "select", + "required": true, + "placeholder": "Select relation type", + "description": "How this resource relates to the identified resource", + "titleMap": [ + { + "value": "IsPartOf", + "name": "Is part of" + }, + { + "value": "IsVariantFormOf", + "name": "Is variant form of" + } + ] + }, + { + "title": "Resource Type", + "key": "related_identifiers[].resource_type", + "type": "select", + "placeholder": "Select resource type (optional)", + "description": "The type of the related resource (optional)", + "titleMap": [ + { + "value": "Audiovisual", + "name": "Audiovisual" + }, + { + "value": "Book", + "name": "Book" + }, + { + "value": "Collection", + "name": "Collection" + }, + { + "value": "ConferencePaper", + "name": "Conference Paper" + }, + { + "value": "DataPaper", + "name": "Data Paper" + }, + { + "value": "Dataset", + "name": "Dataset" + }, + { + "value": "Event", + "name": "Event" + }, + { + "value": "Image", + "name": "Image" + }, + { + "value": "InteractiveResource", + "name": "Interactive Resource" + }, + { + "value": "Model", + "name": "Model" + }, + { + "value": "PhysicalObject", + "name": "Physical Object" + }, + { + "value": "Report", + "name": "Report" + }, + { + "value": "Service", + "name": "Service" + }, + { + "value": "Software", + "name": "Software" + }, + { + "value": "Sound", + "name": "Sound" + }, + { + "value": "Text", + "name": "Text" + }, + { + "value": "Workflow", + "name": "Workflow" + }, + { + "value": "Other", + "name": "Other" + } + ] } ] } diff --git a/cds/modules/deposit/static/templates/cds_deposit/types/video/form.html b/cds/modules/deposit/static/templates/cds_deposit/types/video/form.html index 3e3fe027c..bd606dfa7 100644 --- a/cds/modules/deposit/static/templates/cds_deposit/types/video/form.html +++ b/cds/modules/deposit/static/templates/cds_deposit/types/video/form.html @@ -206,7 +206,7 @@

  • - Related links + Related information
  • From 6252774faf3ef2d0a62ae1f4bbebe3663c0f1c2e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Z=C3=BCbeyde=20Civelek?= Date: Thu, 14 Aug 2025 14:18:39 +0200 Subject: [PATCH 23/41] tests: fix failing tests --- .github/workflows/tests.yml | 8 ++++++-- tests/unit/test_fixtures.py | 2 +- tests/unit/test_schema_datacite.py | 1 + 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 72dad9104..96e97e038 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -9,9 +9,13 @@ name: CI on: push: - branches: main + branches: + - main pull_request: - branches: main + branches: + - main + - additional-files + schedule: # * is a special character in YAML so you have to quote this string - cron: "0 4 * * 6" diff --git a/tests/unit/test_fixtures.py b/tests/unit/test_fixtures.py index 565702dbf..83d7d81cf 100644 --- a/tests/unit/test_fixtures.py +++ b/tests/unit/test_fixtures.py @@ -91,7 +91,7 @@ def test_fixture_categories(app, script_info, db, es, location): res = runner.invoke(cli_categories, [], obj=script_info) assert res.exit_code == 0 categories = RecordMetadata.query.all() - assert len(categories) == 7 + assert len(categories) == 8 for category in categories: assert "VIDEO" in category.json["types"] diff --git a/tests/unit/test_schema_datacite.py b/tests/unit/test_schema_datacite.py index 60db92536..9b5228624 100644 --- a/tests/unit/test_schema_datacite.py +++ b/tests/unit/test_schema_datacite.py @@ -43,6 +43,7 @@ def test_video_metadata_tranform(app, video_record_metadata, recid_pid): {"creatorName": "pluto"}, {"creatorName": "zio paperino"}, ], + "contributors": [], "dates": [{"date": "2017-03-02", "dateType": "Issued"}], "descriptions": [ { From 966cc9212d654b24463bb3cc5d2f90213ef93045 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Z=C3=BCbeyde=20Civelek?= Date: Wed, 13 Aug 2025 11:31:42 +0200 Subject: [PATCH 24/41] feature: add chapters and task for chapter frames --- cds/modules/deposit/api.py | 78 ++++- cds/modules/deposit/receivers.py | 2 + cds/modules/flows/api.py | 2 + cds/modules/flows/tasks.py | 264 ++++++++++++++++- .../cds_previewer/macros/player.html | 166 ++++++++--- .../records/serializers/schemas/video.py | 15 +- .../templates/cds_records/video/detail.html | 199 +++++++++---- cds/modules/records/utils.py | 74 +++++ .../bootstrap3/js/cds_records/cdsRecord.js | 247 ++++++++++++++-- .../theme/assets/bootstrap3/scss/cds/cds.scss | 269 ++++++++++++++++++ tests/unit/test_flows_tasks.py | 88 ++++++ 11 files changed, 1284 insertions(+), 120 deletions(-) diff --git a/cds/modules/deposit/api.py b/cds/modules/deposit/api.py index 5cc9bed37..b7ae89048 100644 --- a/cds/modules/deposit/api.py +++ b/cds/modules/deposit/api.py @@ -65,6 +65,7 @@ get_tasks_status_grouped_by_task_name, merge_tasks_status, ) +from ..flows.tasks import ExtractChapterFramesTask from ..flows.models import FlowMetadata from ..invenio_deposit.api import Deposit, has_status, preserve from ..invenio_deposit.utils import mark_as_action @@ -76,7 +77,7 @@ ) from ..records.minters import cds_doi_generator, is_local_doi, report_number_minter from ..records.resolver import record_resolver -from ..records.utils import is_record, lowercase_value +from ..records.utils import is_record, lowercase_value, parse_video_chapters from ..records.validators import PartialDraft4Validator from ..records.permissions import is_public from .errors import DiscardConflict @@ -504,7 +505,7 @@ def create(cls, data, id_=None, **kwargs): data.setdefault("_access", {}) access_update = data["_access"].setdefault("update", []) try: - if current_user.email not in access_update: + if current_user.email not in access_update: # Add the current user to the ``_access.update`` list access_update.append(current_user.email) except AttributeError: @@ -905,11 +906,74 @@ def _publish_edited(self): return super(Video, self)._publish_edited() + def _has_chapters_changed(self, old_record=None): + """Check if chapters in description have changed.""" + current_description = self.get("description", "") + current_chapters = parse_video_chapters(current_description) + + if old_record is None: + # First publish - trigger if chapters exist + return len(current_chapters) > 0 + + old_description = old_record.get("description", "") + old_chapters = parse_video_chapters(old_description) + + # Compare chapter timestamps and titles + if len(current_chapters) != len(old_chapters): + return True + + for curr, old in zip(current_chapters, old_chapters): + if curr["seconds"] != old["seconds"] or curr["title"] != old["title"]: + return True + + return False + + def _trigger_chapter_frame_extraction(self): + """Trigger chapter frame extraction asynchronously for existing video files.""" + try: + # Get the current flow for this deposit + current_flow = FlowMetadata.get_by_deposit(self["_deposit"]["id"]) + + if current_flow is None: + current_app.logger.warning( + f"No current flow found for video {self.id}. Cannot trigger chapter frame extraction." + ) + return + + current_app.logger.info( + f"Triggering asynchronous ExtractChapterFramesTask for video {self.id} with flow {current_flow.id}" + ) + + payload = current_flow.payload.copy() + + current_app.logger.info(f"Submitting ExtractChapterFramesTask with payload: {payload}") + + ExtractChapterFramesTask().s(**payload).apply_async() + + current_app.logger.info( + f"ExtractChapterFramesTask submitted asynchronously for video {self.id}, flow_id: {current_flow.id}" + ) + except Exception as e: + current_app.logger.error( + f"Failed to trigger async chapter frame extraction for video {self.id}: {e}" + ) + import traceback + + current_app.logger.error(f"Traceback: {traceback.format_exc()}") + @mark_as_action def publish(self, pid=None, id_=None, **kwargs): """Publish a video and update the related project.""" # save a copy of the old PID video_old_id = self["_deposit"]["id"] + + # Check if this is a republish and get the old record + old_record = None + try: + _, old_record = self.fetch_published() + except KeyError as e: # First publish (no pid key) + pass + try: self["category"] = self.project["category"] self["type"] = self.project["type"] @@ -930,6 +994,13 @@ def publish(self, pid=None, id_=None, **kwargs): video_published = super(Video, self).publish(pid=pid, id_=id_, **kwargs) _, record_new = self.fetch_published() + # Check if chapters have changed and trigger frame extraction + if self._has_chapters_changed(old_record): + current_app.logger.info( + f"Chapters changed for video {self.id}, triggering frame extraction" + ) + self._trigger_chapter_frame_extraction() + # update associated project video_published.project._update_videos( [video_build_url(video_old_id)], @@ -1088,7 +1159,6 @@ def _create_tags(self): except IndexError: return - def mint_doi(self): """Mint DOI.""" assert self.has_record() @@ -1109,7 +1179,7 @@ def mint_doi(self): status=PIDStatus.RESERVED, ) return self - + project_resolver = Resolver( pid_type="depid", diff --git a/cds/modules/deposit/receivers.py b/cds/modules/deposit/receivers.py index 18727fec7..d5949a39d 100644 --- a/cds/modules/deposit/receivers.py +++ b/cds/modules/deposit/receivers.py @@ -33,6 +33,7 @@ from cds.modules.flows.tasks import ( DownloadTask, ExtractFramesTask, + ExtractChapterFramesTask, ExtractMetadataTask, TranscodeVideoTask, ) @@ -87,4 +88,5 @@ def register_celery_class_based_tasks(sender, app=None): celery.register_task(ExtractMetadataTask()) celery.register_task(DownloadTask()) celery.register_task(ExtractFramesTask()) + celery.register_task(ExtractChapterFramesTask()) celery.register_task(TranscodeVideoTask()) diff --git a/cds/modules/flows/api.py b/cds/modules/flows/api.py index 136bef012..09acbaadf 100644 --- a/cds/modules/flows/api.py +++ b/cds/modules/flows/api.py @@ -39,6 +39,7 @@ from .tasks import ( CeleryTask, DownloadTask, + ExtractChapterFramesTask, ExtractFramesTask, ExtractMetadataTask, TranscodeVideoTask, @@ -245,6 +246,7 @@ def _find_celery_task_by_name(name): ExtractMetadataTask, ExtractFramesTask, TranscodeVideoTask, + ExtractChapterFramesTask, ]: if celery_task.name == name: return celery_task diff --git a/cds/modules/flows/tasks.py b/cds/modules/flows/tasks.py index eaa97952a..8dc0068d5 100644 --- a/cds/modules/flows/tasks.py +++ b/cds/modules/flows/tasks.py @@ -62,7 +62,7 @@ from ..opencast.api import OpenCast from ..opencast.error import RequestError from ..opencast.utils import get_qualities -from ..records.utils import to_string +from ..records.utils import to_string, parse_video_chapters from ..xrootd.utils import file_opener_xrootd from .deposit import index_deposit_project from .files import dispose_object_version, move_file_into_local @@ -197,7 +197,9 @@ def _meta_exception_envelope(self, exc): NOTE: workaround to be able to save the payload in celery in case of exceptions. """ - meta = dict(message=str(exc), payload=self._base_payload) + # Safety check in case base payload is not set yet + payload = getattr(self, '_base_payload', {}) + meta = dict(message=str(exc), payload=payload) return dict(exc_message=meta, exc_type=exc.__class__.__name__) def on_failure(self, exc, task_id, args, kwargs, einfo): @@ -223,7 +225,16 @@ def on_success(self, exc, task_id, args, kwargs): def _reindex_video_project(self): """Reindex video and project.""" with celery_app.flask_app.app_context(): - deposit_id = self._base_payload["deposit_id"] + # Safety check in case base payload is not set yet + if not hasattr(self, '_base_payload') or not self._base_payload or 'deposit_id' not in self._base_payload: + if hasattr(self, 'deposit_id') and self.deposit_id: + deposit_id = self.deposit_id + else: + self.log("Cannot reindex: deposit_id not available") + return + else: + deposit_id = self._base_payload["deposit_id"] + try: index_deposit_project(deposit_id) except PIDDeletedError: @@ -590,10 +601,10 @@ def progress_updater(current_frame): object_=self.object_version, output_dir=output_folder, progress_updater=progress_updater, - **options + **options, ), object_=self.object_version, - **options + **options, ) except Exception: db.session.rollback() @@ -601,6 +612,8 @@ def progress_updater(current_frame): self.clean(version_id=self.object_version_id) raise + total_frames = len(frames) + # Generate GIF images self._create_gif( bucket=str(self.object_version.bucket.id), @@ -618,7 +631,7 @@ def progress_updater(current_frame): db.session.commit() self.log("Finished task {0}".format(kwargs["task_id"])) - return "Created {0} frames.".format(len(frames)) + return "Created {0} frames.".format(total_frames) @classmethod def _time_position(cls, duration, frames_start=5, frames_end=95, frames_gap=10): @@ -648,7 +661,7 @@ def _create_tmp_frames( duration, output_dir, progress_updater=None, - **kwargs + **kwargs, ): """Create frames in temporary files.""" # Generate frames @@ -727,6 +740,239 @@ def _create_object( [ObjectVersionTag.create(obj, k, to_string(tags[k])) for k in tags] +class ExtractChapterFramesTask(AVCTask): + """Extract chapter frames task - dedicated task for chapter frame extraction only.""" + + name = "file_video_extract_chapter_frames" + + @staticmethod + def clean(version_id, *args, **kwargs): + """Delete generated chapter frame ObjectVersion slaves.""" + # remove all objects version "slave" with type "frame" that are chapter frames + tag_alias_1 = aliased(ObjectVersionTag) + tag_alias_2 = aliased(ObjectVersionTag) + tag_alias_3 = aliased(ObjectVersionTag) + + slaves = ( + ObjectVersion.query.join(tag_alias_1, ObjectVersion.tags) + .join(tag_alias_2, ObjectVersion.tags) + .join(tag_alias_3, ObjectVersion.tags) + .filter(tag_alias_1.key == "master", tag_alias_1.value == version_id) + .filter(tag_alias_2.key == "context_type", tag_alias_2.value == "frame") + .filter(tag_alias_3.key == "is_chapter_frame", tag_alias_3.value == "true") + .all() + ) + + for slave in slaves: + dispose_object_version(slave) + + def run(self, *args, **kwargs): + """Extract frames only at chapter timestamps from video description. + + This task is specifically designed to extract frames for chapters only, + without affecting other frame extraction processes. + + The task receives parameters through the standard AVCTask initialization: + - self.deposit_id: The deposit ID containing the video description + - self.object_version: The ObjectVersion of the master video file + - self.flow_id: The current flow ID for task metadata integration + """ + + # Create or update the TaskMetadata + flow_task_metadata = self.get_or_create_flow_task() + kwargs["celery_task_id"] = str(self.request.id) + kwargs["task_id"] = str(flow_task_metadata.id) + flow_task_metadata.payload = self.get_full_payload(**kwargs) + flow_task_metadata.status = FlowTaskStatus.STARTED + flow_task_metadata.message = "" + db.session.commit() + + self.log("Started task {0}".format(kwargs["task_id"])) + + output_folder = tempfile.mkdtemp() + + bucket_was_locked = False + if self.object_version.bucket.locked: + # If record was published we need to unlock the bucket + bucket_was_locked = True + self.object_version.bucket.locked = False + + try: + # Get the deposit to access the description + from cds.modules.deposit.api import deposit_video_resolver + db.session.refresh(self.object_version) + deposit_video = deposit_video_resolver(self.deposit_id) + description = deposit_video.get("description", "") + + self.log("Found description with {0} characters".format(len(description))) + + # Parse chapters from description + chapters = parse_video_chapters(description) + + if not chapters: + self.log("No chapters found in description - task completed") + return {"chapter_frames_extracted": 0, "status": "no_chapters"} + + self.log("Found {0} chapters in description".format(len(chapters))) + + # Get video duration from metadata + duration = float(self._base_payload.get("tags", {}).get("duration", 0)) + + if duration == 0: + raise ValueError("Video duration is 0 - cannot extract frames") + + # Check which timestamps already have frames + existing_timestamps = self._get_existing_frame_timestamps() + + def progress_updater(current_chapter): + """Progress reporter.""" + percentage = current_chapter / len(chapters) * 100 + meta = dict( + payload=dict(size=len(chapters), percentage=percentage), + message="Extracting chapter frames [{0} out of {1}]".format( + current_chapter, len(chapters) + ), + ) + self.log(meta["message"]) + + frames = self._create_chapter_frames( + chapters=chapters, + duration=duration, + object_=self.object_version, + output_dir=output_folder, + existing_timestamps=existing_timestamps, + progress_updater=progress_updater, + ) + + # Sync deposit and record files + sync_records_with_deposit_files(self.deposit_id) + + except Exception: + db.session.rollback() + shutil.rmtree(output_folder, ignore_errors=True) + self.clean(version_id=self.object_version_id) + raise + + total_frames = len(frames) + + if bucket_was_locked: + # Lock the bucket again + self.object_version.bucket.locked = True + + # Cleanup + shutil.rmtree(output_folder) + + self.log("Finished task {0}".format(kwargs["task_id"])) + return "Created {0} chapter frames.".format(total_frames) + + def _get_existing_frame_timestamps(self): + """Get set of existing frame timestamps to avoid duplicates.""" + tag_alias_1 = aliased(ObjectVersionTag) + tag_alias_2 = aliased(ObjectVersionTag) + tag_alias_3 = aliased(ObjectVersionTag) + + existing = ( + ObjectVersion.query.join(tag_alias_1, ObjectVersion.tags) + .join(tag_alias_2, ObjectVersion.tags) + .join(tag_alias_3, ObjectVersion.tags) + .filter(tag_alias_1.key == "master", tag_alias_1.value == self.object_version_id) + .filter(tag_alias_2.key == "context_type", tag_alias_2.value == "frame") + .filter(tag_alias_3.key == "timestamp") + .all() + ) + + existing_timestamps = set() + for obj in existing: + for tag in obj.tags: + if tag.key == "timestamp": + try: + existing_timestamps.add(float(tag.value)) + except ValueError: + continue + + return existing_timestamps + + @classmethod + def _create_chapter_frames( + cls, + chapters, + duration, + object_, + output_dir, + existing_timestamps, + progress_updater=None, + ): + """Create frames for chapters that don't already exist at those timestamps.""" + created_frames = [] + current_chapter = 0 + + with move_file_into_local(object_, delete=True) as url: + for chapter in chapters: + current_chapter += 1 + + if progress_updater: + progress_updater(current_chapter) + + chapter_seconds = chapter["seconds"] + chapter_title = chapter["title"] + + # Skip chapters that are beyond video duration + if chapter_seconds > duration: + continue + + # Skip if frame already exists at this timestamp (with some tolerance) + timestamp_exists = any( + abs(existing_ts - chapter_seconds) < 0.1 + for existing_ts in existing_timestamps + ) + if timestamp_exists: + continue + + frame_filename = "chapter-{0}.jpg".format(int(chapter_seconds)) + frame_path = os.path.join(output_dir, frame_filename) + + # For 0:00 chapters, use a small offset to avoid extraction issues + chapter_seconds = max(chapter_seconds, 0.1) if chapter_seconds == 0 else chapter_seconds + + try: + # Extract single frame at chapter timestamp using ff_frames + ff_frames( + input_file=url, + start=chapter_seconds, + end=chapter_seconds + 0.01, # Extract just one frame + step=1, + duration=duration, + output=frame_path, + ) + + if os.path.exists(frame_path) and os.path.getsize(frame_path) > 0: + # Create ObjectVersion for chapter frame (as normal frame) + ExtractFramesTask._create_object( + bucket=object_.bucket, + key=frame_filename, + stream=file_opener_xrootd(frame_path, "rb"), + size=os.path.getsize(frame_path), + media_type="image", + context_type="frame", + master_id=object_.version_id, + is_chapter_frame=True, + timestamp=chapter_seconds, + ) + + created_frames.append(frame_path) + + except Exception as e: + # Log error but continue with other chapters + current_app.logger.error( + "Failed to extract frame for chapter at {0}s: {1}".format( + chapter_seconds, str(e) + ) + ) + continue + + return created_frames + + class TranscodeVideoTask(AVCTask): """Transcode video task. @@ -793,7 +1039,7 @@ def _update_flow_tasks(self, flow_tasks, status, message, **kwargs): opencast_publication_tag=current_app.config["CDS_OPENCAST_QUALITIES"][ quality ]["opencast_publication_tag"], - **kwargs # may contain `opencast_event_id` + **kwargs, # may contain `opencast_event_id` ) # JSONb cols needs to be assigned (not updated) to be persisted flow_task_metadata.payload = new_payload @@ -848,7 +1094,7 @@ def _start_transcodable_flow_tasks_or_cancel(self, wanted_qualities=None): new_payload.update( task_id=str(t.id), celery_task_id=str(self.request.id), - **self._base_payload + **self._base_payload, ) # JSONb cols needs to be assigned (not updated) to be persisted t.payload = new_payload diff --git a/cds/modules/previewer/templates/cds_previewer/macros/player.html b/cds/modules/previewer/templates/cds_previewer/macros/player.html index a7301dd58..a198cc557 100644 --- a/cds/modules/previewer/templates/cds_previewer/macros/player.html +++ b/cds/modules/previewer/templates/cds_previewer/macros/player.html @@ -53,44 +53,134 @@ }); window.top.player = player; - // Preload - player.source = { - sources: [ - { - {% if video_source %} - src: "{{ video_source }}", - type: 'application/x-mpegURL' - {% elif obj.m3u8_uri and obj.subformats|length > 0 %} - src: '{{ obj.m3u8_uri }}', - type: 'application/x-mpegURL' - {% else %} - src: '{{ obj.uri }}', - type: 'video/mp4' - {% endif %} - }, - ], - {% if not embed_config.subtitlesOff %} - textTracks: [ - { - kind: 'metadata', - src: '{{ obj.thumbnails_uri }}', - label: 'thumbnails', - default: true, - }, - {% for uri, lang in obj.subtitles %} - { - kind: 'subtitles', - src: '{{ uri }}', - label: '{{ lang }}', - srclang: '{{ lang }}', - {% if embed_config.subtitles and embed_config.subtitles == lang %} - default: true, - {% endif %} - }, - {% endfor %} - ], - {% endif %} - poster: '{{ obj.poster_uri }}', + + // --- Chapters helpers --- + function durationToSeconds(durationStr) { + if (!durationStr) return null; + const parts = durationStr.split(':').map(Number); // [HH, MM, SS] or [MM, SS] + if (parts.length === 3) { + return parts[0] * 3600 + parts[1] * 60 + parts[2]; + } + if (parts.length === 2) { + return parts[0] * 60 + parts[1]; + } + return null; + } + function cleanHtmlFromTitle(title) { + if (!title) return title; + + // Decode HTML entities by using a temporary DOM element + const temp = document.createElement("textarea"); + temp.innerHTML = title; + let decoded = temp.value; + decoded = decoded.replace(/<[^>]+>/g, " "); + decoded = decoded.replace(/\s+/g, " ").trim(); + + return decoded; + } + function parseChapters(description) { + if (!description) return []; + + const pattern = /(?:^|\n)\s*(\d{1,2}:(?:\d{1,2}:)?\d{1,2})\s*[-\s]*(.+?)(?=\n|$)/gm; + const chapters = []; + let match; + + while ((match = pattern.exec(description)) !== null) { + const [, timestampStr, title] = match; + + const timeParts = timestampStr.split(":"); + let totalSeconds; + + if (timeParts.length === 2) { + const [minutes, seconds] = timeParts.map(Number); + totalSeconds = minutes * 60 + seconds; + } else if (timeParts.length === 3) { + const [hours, minutes, seconds] = timeParts.map(Number); + totalSeconds = hours * 3600 + minutes * 60 + seconds; + } else { + continue; + } + + const cleanTitle = cleanHtmlFromTitle(title); + if (cleanTitle) { + chapters.push({ + startTime: totalSeconds, + text: cleanTitle + }); + } + } + + chapters.sort((a, b) => a.startTime - b.startTime); + + // Add endTime for VTT + const videoDuration = durationToSeconds({{ (record.duration if record and record.duration else "") | tojson }}); + for (let i = 0; i < chapters.length; i++) { + if (i + 1 < chapters.length) { + chapters[i].endTime = chapters[i + 1].startTime; + } else { + chapters[i].endTime = videoDuration || (chapters[i].startTime + 1); + } + } + + return chapters; + } + function formatVttTime(total) { + const h = Math.floor(total / 3600); + const m = Math.floor((total % 3600) / 60); + const s = Math.floor(total % 60); + return String(h).padStart(2,'0') + ':' + String(m).padStart(2,'0') + ':' + String(s).padStart(2,'0') + '.000'; + } + function buildChaptersVtt(description) { + const chapters = parseChapters(description); + if (!chapters.length) return null; + let vtt = 'WEBVTT\n\n'; + for (let i = 0; i < chapters.length; i++) { + const c = chapters[i]; + vtt += `${i+1}\n${formatVttTime(c.startTime)} --> ${formatVttTime(c.endTime)}\n${c.text}\n\n`; + } + return vtt; + } + + // Build textTracks + var textTracksArr = []; + // Chapters + var descStr = {{ (record.description if record and record.description else "") | tojson }}; + var vttStr = buildChaptersVtt(descStr); + if (vttStr) { + var chapUrl = URL.createObjectURL(new Blob([vttStr], { type: 'text/vtt' })); + textTracksArr.push({ kind: 'chapters', src: chapUrl, label: 'Chapters' }); + } + {% if not embed_config.subtitlesOff %} + textTracksArr.push({ kind: 'metadata', src: '{{ obj.thumbnails_uri }}', label: 'thumbnails', default: true }); + {% for uri, lang in obj.subtitles %} + textTracksArr.push({ + kind: 'subtitles', + src: '{{ uri }}', + label: '{{ lang }}', + srclang: '{{ lang }}' + {% if embed_config.subtitles and embed_config.subtitles == lang %}, default: true{% endif %} + }); + {% endfor %} + {% endif %} + + // Set source with textTracks + player.source = { + sources: [ + { + {% if video_source %} + src: "{{ video_source }}", + type: 'application/x-mpegURL' + {% elif obj.m3u8_uri and obj.subformats|length > 0 %} + src: '{{ obj.m3u8_uri }}', + type: 'application/x-mpegURL' + {% else %} + src: '{{ obj.uri }}', + type: 'video/mp4' + {% endif %} + } + ], + textTracks: textTracksArr, + poster: '{{ obj.poster_uri }}', {% if obj.vr %} vr: { 360: true, diff --git a/cds/modules/records/serializers/schemas/video.py b/cds/modules/records/serializers/schemas/video.py index 88fcb3d8c..13b34ac89 100644 --- a/cds/modules/records/serializers/schemas/video.py +++ b/cds/modules/records/serializers/schemas/video.py @@ -19,7 +19,7 @@ """Video JSON schema.""" from invenio_jsonschemas import current_jsonschemas -from marshmallow import Schema, fields, pre_load, post_load +from marshmallow import Schema, fields, pre_load, post_load, post_dump from ....deposit.api import Video from ..fields.datetime import DateString @@ -43,6 +43,7 @@ TranslationsSchema, ) from .doi import DOI +from ...utils import parse_video_chapters class _CDSSSchema(Schema): @@ -166,6 +167,7 @@ class VideoSchema(StrictKeysSchema): ) collections = fields.List(fields.Str, many=True) additional_languages = fields.List(fields.Str, many=True) + chapters = fields.List(fields.Dict, dump_only=True) # Preservation fields location = fields.Str() @@ -177,3 +179,14 @@ def post_load(self, data, **kwargs): """Post load.""" data["$schema"] = current_jsonschemas.path_to_url(Video._schema) return data + + @post_dump(pass_many=False) + def post_dump(self, data, **kwargs): + """Post dump - add parsed chapters.""" + description = data.get('description', '') + if description: + data['chapters'] = parse_video_chapters(description) + else: + data['chapters'] = [] + + return data diff --git a/cds/modules/records/static/templates/cds_records/video/detail.html b/cds/modules/records/static/templates/cds_records/video/detail.html index a8444c7e1..f28126fec 100644 --- a/cds/modules/records/static/templates/cds_records/video/detail.html +++ b/cds/modules/records/static/templates/cds_records/video/detail.html @@ -4,9 +4,9 @@
    -
    -
    -
    +
    +
    +