diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index 72dad9104..96e97e038 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -9,9 +9,13 @@ name: CI
on:
push:
- branches: main
+ branches:
+ - main
pull_request:
- branches: main
+ branches:
+ - main
+ - additional-files
+
schedule:
# * is a special character in YAML so you have to quote this string
- cron: "0 4 * * 6"
diff --git a/Bruno Collection - CDS Videos Publish Video.json b/Bruno Collection - CDS Videos Publish Video.json
index fb6898f95..a1aed1578 100644
--- a/Bruno Collection - CDS Videos Publish Video.json
+++ b/Bruno Collection - CDS Videos Publish Video.json
@@ -4,38 +4,8 @@
"items": [
{
"type": "http",
- "name": "Step 6: Get Project to Check the Flow Status",
+ "name": "Optional- Update the Access of the Video",
"seq": 7,
- "request": {
- "url": "{{baseURL}}/api/deposits/project/{{project_id}}",
- "method": "GET",
- "headers": [
- {
- "name": "content-type",
- "value": "application/vnd.project.partial+json",
- "enabled": true
- }
- ],
- "params": [],
- "body": {
- "mode": "json",
- "json": "",
- "formUrlEncoded": [],
- "multipartForm": []
- },
- "script": {},
- "vars": {},
- "assertions": [],
- "tests": "",
- "auth": {
- "mode": "inherit"
- }
- }
- },
- {
- "type": "http",
- "name": "Optional: Update the Access of the Video",
- "seq": 6,
"request": {
"url": "{{baseURL}}/api/deposits/video/{{video_id}}",
"method": "PUT",
@@ -79,7 +49,7 @@
"params": [],
"body": {
"mode": "json",
- "json": "{\n \"$schema\": \"https://localhost:5000/schemas/deposits/records/videos/project/project-v1.0.0.json\",\n \"_access\": {\n \"update\": [\n \"admin@test.ch\",\n \"your-egroup@cern.ch\"\n ],\n \"read\": [ // If you want to restrict the project, add access read\n \"your-egroup@cern.ch\"\n ]\n },\n // Add category and type\n \"category\": \"ATLAS\",\n \"type\": \"VIDEO\"\n}",
+ "json": "{\n \"_access\": {\n \"update\": [\n \"admin@test.ch\",\n \"your-egroup@cern.ch\"\n ],\n \"read\": [ // If you want to restrict the project, add access read\n \"your-egroup@cern.ch\"\n ]\n },\n // Add category and type\n \"category\": \"ATLAS\",\n \"type\": \"VIDEO\"\n}",
"formUrlEncoded": [],
"multipartForm": []
},
@@ -111,7 +81,7 @@
"params": [],
"body": {
"mode": "json",
- "json": "{\n \"$schema\":\"https://localhost:5000/schemas/deposits/records/videos/video/video-v1.0.0.json\",\n \"_project_id\":\"{{project_id}}\",\n \"title\":\n {\n \"title\":\"your_title\"\n },\n \"_access\": {\n \"read\": [\n \"your-egroup@cern.ch\"\n ]\n },\n \"vr\": false,\n \"featured\": false,\n \"language\": \"en\",\n \"contributors\": [\n {\n \"name\": \"Surname, Name\",\n \"ids\": [\n {\n \"value\": \"cern id\",\n \"source\": \"cern\"\n }\n ],\n \"email\": \"test@cern.ch\",\n \"role\": \"Co-Producer\"\n }\n ],\n \"description\": \"Description\",\n \"date\": \"2024-11-12\",\n \"keywords\":[\n {\n \"name\": \"keyword\",\n \"value\": {\n \"name\": \"keyword\"\n }\n },\n {\n \"name\": \"keyword2\",\n \"value\": {\n \"name\": \"keyword2\"\n }\n }\n ],\n \"related_links\":[\n {\n \"name\": \"related link\",\n \"url\": \"https://relatedlink\"\n }\n ]\n}",
+ "json": "{\n \"_project_id\":\"{{project_id}}\",\n \"language\":\"en\",\n \"title\":\n {\n \"title\":\"your_title\"\n },\n \"_access\": {\n \"read\": [\n \"your-egroup@cern.ch\"\n ]\n },\n \"vr\": false,\n \"featured\": false,\n \"language\": \"en\",\n \"contributors\": [\n {\n \"name\": \"Surname, Name\",\n \"ids\": [\n {\n \"value\": \"cern id\",\n \"source\": \"cern\"\n }\n ],\n \"email\": \"test@cern.ch\",\n \"role\": \"Co-Producer\"\n }\n ],\n \"description\": \"Description\",\n \"date\": \"2024-11-12\",\n \"keywords\":[\n {\n \"name\": \"keyword\",\n \"value\": {\n \"name\": \"keyword\"\n }\n },\n {\n \"name\": \"keyword2\",\n \"value\": {\n \"name\": \"keyword2\"\n }\n }\n ],\n \"related_links\":[\n {\n \"name\": \"related link\",\n \"url\": \"https://relatedlink\"\n }\n ]\n}",
"formUrlEncoded": [],
"multipartForm": []
},
@@ -184,7 +154,9 @@
"formUrlEncoded": [],
"multipartForm": []
},
- "script": {},
+ "script": {
+ "res": "let data = res.body;\nbru.setEnvVar(\"main_video_key\", data.key);"
+ },
"vars": {},
"assertions": [],
"tests": "",
@@ -195,12 +167,18 @@
},
{
"type": "http",
- "name": "Step 5: Upload additional file",
+ "name": "Optional: Upload additional file",
"seq": 5,
"request": {
"url": "{{baseURL}}/api/files/{{bucket_id}}/{{additional_file}}",
"method": "PUT",
- "headers": [],
+ "headers": [
+ {
+ "name": "X-Invenio-File-Tags",
+ "value": "context_type=additional_file",
+ "enabled": true
+ }
+ ],
"params": [],
"body": {
"mode": "json",
@@ -221,8 +199,38 @@
},
{
"type": "http",
- "name": "Step 7: Publish",
+ "name": "Step 5: Get Video to Check the Flow Status",
"seq": 8,
+ "request": {
+ "url": "{{baseURL}}/api/deposits/video/{{video_id}}",
+ "method": "GET",
+ "headers": [
+ {
+ "name": "content-type",
+ "value": "application/vnd.project.partial+json",
+ "enabled": true
+ }
+ ],
+ "params": [],
+ "body": {
+ "mode": "json",
+ "json": "",
+ "formUrlEncoded": [],
+ "multipartForm": []
+ },
+ "script": {},
+ "vars": {},
+ "assertions": [],
+ "tests": "",
+ "auth": {
+ "mode": "inherit"
+ }
+ }
+ },
+ {
+ "type": "http",
+ "name": "Step 6- Publish",
+ "seq": 9,
"request": {
"url": "{{baseURL}}/api/deposits/video/{{video_id}}/actions/publish",
"method": "POST",
@@ -248,9 +256,40 @@
"mode": "inherit"
}
}
+ },
+ {
+ "type": "http",
+ "name": "Optional: Replace Main File",
+ "seq": 6,
+ "request": {
+ "url": "{{baseURL}}/api/files/{{bucket_id}}/{{main_video_key}}",
+ "method": "PUT",
+ "headers": [
+ {
+ "name": "X-Invenio-File-Tags",
+ "value": "times_replaced=1",
+ "enabled": true
+ }
+ ],
+ "params": [],
+ "body": {
+ "mode": "json",
+ "json": "",
+ "formUrlEncoded": [],
+ "multipartForm": []
+ },
+ "script": {
+ "req": "const fs = require('fs');\nconst path = require('path');\n\n// File details\nconst filename = \"video_name.mp4\"; // CHANGE HERE\nconst filePath = \"video_file_path\"; // CHANGE HERE\n\n// Read the file as raw binary data\nconst fileContent = fs.readFileSync(filePath);\n\n// Set request headers\nreq.setHeader(\"Accept\", \"application/json, text/plain, */*\"); \nreq.setHeader(\"Accept-Encoding\", \"gzip, deflate, br, zstd\"); \nreq.setHeader(\"Content-Length\", fileContent.length);\n\n// Attach the file content as the request body\nreq.setBody(fileContent);\n"
+ },
+ "vars": {},
+ "assertions": [],
+ "tests": "",
+ "auth": {
+ "mode": "inherit"
+ }
+ }
}
],
- "activeEnvironmentUid": "O01m8KLYsgrkGRjOSv443",
"environments": [
{
"variables": [
@@ -284,9 +323,6 @@
"allow": true
}
},
- "ignore": [
- "node_modules",
- ".git"
- ]
+ "ignore": ["node_modules", ".git"]
}
-}
\ No newline at end of file
+}
diff --git a/README.rst b/README.rst
index b27d6d768..6fb1036a2 100644
--- a/README.rst
+++ b/README.rst
@@ -33,10 +33,13 @@ Table of Contents
- `Step 2: Create a Video <#step-2-create-a-video>`_
- `Step 3: Upload the Video <#step-3-upload-the-video>`_
- `Step 4: Create a Flow <#step-4-create-a-flow>`_
- - `Step 5: (Optional) Upload Additional File <#step-5-optional-upload-additional-file>`_
+ - `Optional: Upload Additional File <#optional-upload-additional-file>`_
- `Optional: Update the Access of the Video <#optional-update-the-access-of-the-video>`_
- - `Step 6: Get Project to Check the Flow Status <#step-6-get-project-to-check-the-flow-status>`_
- - `Step 7: Publish Video <#step-7-publish-video>`_
+ - `Step 5: Get Video to Check the Flow Status <#step-5-get-video-to-check-the-flow-status>`_
+ - `Step 6: Publish Video <#step-6-publish-video>`_
+- `Replace the Main Video File through REST API <#replace-the-main-video-file-through-rest-api>`_
+ - `General Flow <#general-flow>`_
+ - `Alternative: Without Doing the Get Request <#alternative-without-doing-the-get-request>`_
Prerequisites
@@ -228,6 +231,7 @@ If you'd like to use the pre-configured REST API collection in Bruno, ensure you
- Download this `Bruno collection <./Bruno%20Collection%20-%20CDS%20Videos%20Publish%20Video.json>`_.
- Open Bruno and import downloaded collection.
+ - Switch to **Developer Mode**.
- Create an environment for the collection.
- Configure the environment by adding a variable named ``baseURL``. Set its value to your API base URL (e.g., ``http://localhost:5000``).
@@ -258,11 +262,6 @@ Step 1: Create a Project
- **Location**
- **Description**
- **Required/Optional**
- * - **$schema**
- - string
- - body
- - Schema URL for the project creation.
- - Required
* - **category**
- string
- body
@@ -307,7 +306,6 @@ To restrict the project, add ``_access/read``:
.. code-block:: json
{
- "$schema": "https://localhost:5000/schemas/deposits/records/videos/project/project-v1.0.0.json",
"_access": {
"update": [
"admin@test.ch",
@@ -355,7 +353,7 @@ To restrict the project, add ``_access/read``:
**Response:**
-Created project JSON.
+Created project JSON. Save ``response.body.project_id`` as ``_project_id`` for later use.
Step 2: Create a Video
@@ -379,11 +377,6 @@ Step 2: Create a Video
- **Location**
- **Description**
- **Required/Optional**
- * - **$schema**
- - string
- - body
- - Schema URL for video creation.
- - Required
* - **_project_id**
- string
- body
@@ -423,7 +416,7 @@ Step 2: Create a Video
- string
- body
- Language of the video.
- - Optional
+ - Required
* - **featured**
- boolean
- body
@@ -447,7 +440,6 @@ To restrict the video, add ``_access/read``. The ``_access/update`` will be the
.. code-block:: json
{
- "$schema":"https://localhost:5000/schemas/deposits/records/videos/video/video-v1.0.0.json",
"_project_id":"{{project_id}}",
"title":
{
@@ -495,12 +487,13 @@ To restrict the video, add ``_access/read``. The ``_access/update`` will be the
"name": "related link",
"url": "https://relatedlink"
}
- ]
+ ],
+ "language": "en"
}
**Response:**
-Created video JSON.
+Created video JSON. Save ``response.body.id`` as ``video_id`` and ``response.body.metadata._buckets.deposit`` as ``bucket_id`` for later use.
Step 3: Upload the Video
@@ -542,7 +535,7 @@ Step 3: Upload the Video
**Response:**
-Uploaded video JSON.
+Uploaded video JSON. Save ``response.body.version_id`` as ``main_file_version_id`` and ``response.body.key`` as ``video_key`` for later use.
Step 4: Create a Flow
@@ -595,16 +588,20 @@ Step 4: Create a Flow
**Response:**
-Created flow JSON.
+Created flow JSON. If you want to replace the main video file later, save ``response.body.key`` as ``main_video_key``.
-Step 5: (Optional) Upload Additional File
+Optional: Upload Additional File
------------------------------------------
**Request:**
``PUT`` ``{{baseURL}}/api/files/{{bucket_id}}/{{additional_file}}``
+**Headers:**
+
+- ``X-Invenio-File-Tags: context_type=additional_file``
+
**Parameters:**
.. list-table::
@@ -629,10 +626,6 @@ Step 5: (Optional) Upload Additional File
- To include the file in the body, modify the `pre-request script` in Bruno.
-**Response:**
-
-Uploaded additional file JSON.
-
Optional: Update the Access of the Video
----------------------------------------
@@ -682,12 +675,12 @@ To restrict the video, add ``_access/read``. If you want to change the access/up
Updated video JSON.
-Step 6: Get Project to Check the Flow Status
+Step 5: Get Video to Check the Flow Status
--------------------------------------------
**Request:**
-``GET`` ``{{baseURL}}/api/deposits/project/{{project_id}}``
+``GET`` ``{{baseURL}}/api/deposits/video/{{video_id}}``
**Headers:**
@@ -702,19 +695,18 @@ Step 6: Get Project to Check the Flow Status
- **Type**
- **Location**
- **Description**
- * - **project_id**
+ * - **video_id**
- string
- path
- - ID of the project.
+ - ID of the video.
**Response:**
-Updated project JSON with flow status as ``state``:
+Updated video JSON with flow status. You can find the flow status in ``response.body.metadata._cds.state``:
.. code-block:: json
{
- "id": "b320568fc1264dda90a8f459be42892e",
"_cds": {
"state": {
"file_transcode": "STARTED",
@@ -725,7 +717,7 @@ Updated project JSON with flow status as ``state``:
}
-Step 7: Publish Video
+Step 6: Publish Video
----------------------
Before publishing the video, ensure that the workflow is complete.
@@ -757,3 +749,180 @@ Before publishing the video, ensure that the workflow is complete.
Published video deposit JSON.
+
+Replace the Main Video File through REST API
+============================================
+
+General Flow
+------------
+
+1. Get the video (see `Step 5 <#step-5-get-video-to-check-the-flow-status>`_) and find the master file key from the response.
+
+ **Request:**
+
+ ``GET {{baseURL}}/api/deposits/video/{{video_id}}``
+
+ **Headers:**
+
+ - ``content-type: application/vnd.project.partial+json``
+
+ **Parameters:**
+
+ .. list-table::
+ :header-rows: 1
+
+ * - **Name**
+ - **Type**
+ - **Location**
+ - **Description**
+ * - **video_id**
+ - string
+ - path
+ - ID of the video.
+
+ **Response:**
+
+ Video JSON. You can find the main file inside ``response.body.metadata._files``.
+
+ .. code-block:: javascript
+
+ let files = data.metadata?._files || [];
+ // Find the master file
+ let masterFile = files.find(f => f.context_type === "master");
+ video_key = masterFile.key;
+
+
+2. Upload the new video with the same master key and same ``bucket_id`` (see `Step 3 <#step-3-upload-the-video>`_)
+
+ **Upload Request**
+
+ ``PUT {{baseURL}}/api/files/{{bucket_id}}/{{main_video_key}}``
+
+ **Headers:**
+
+ - ``X-Invenio-File-Tags: times_replaced=number_of_times_replaced``
+
+ **Parameters:**
+
+ .. list-table::
+ :header-rows: 1
+
+ * - **Name**
+ - **Type**
+ - **Location**
+ - **Description**
+ * - **bucket_id**
+ - string
+ - path
+ - ID of the bucket to upload the file.
+ * - **main_video_key**
+ - string
+ - path
+ - Key of the previous main file.
+ * - **file**
+ - file
+ - body
+ - The file to be uploaded.
+
+ **Response:**
+
+ Uploaded file JSON. Save version_id and key for later use:
+
+ - ``response.body.version_id`` → ``version_id``
+ - ``response.body.key`` → ``video_key``
+
+
+
+3. Start the flow with your new ``video_key`` and ``version_id`` but keep the same ``bucket_id`` and ``deposit_id`` (see `Step 4 <#step-4-create-a-flow>`_)
+
+ **Request:**
+
+ ``POST /api/flows/``
+
+ **Headers:**
+
+ - ``content-type: application/vnd.project.partial+json``
+
+ **Parameters:**
+
+ .. list-table::
+ :header-rows: 1
+
+ * - **Name**
+ - **Type**
+ - **Location**
+ - **Description**
+ * - **version_id**
+ - string
+ - body
+ - Version ID from the uploaded video response.
+ * - **key**
+ - string
+ - body
+ - Video key from the uploaded video response.
+ * - **bucket_id**
+ - string
+ - body
+ - Bucket ID from the Create Video response.
+ * - **deposit_id**
+ - string
+ - body
+ - Deposit ID from the Create Video response.
+
+ **Body:**
+
+ .. code-block:: json
+
+ {
+ "version_id": "{{main_file_version_id}}",
+ "key": "{{video_key}}",
+ "bucket_id": "{{bucket_id}}",
+ "deposit_id": "{{video_id}}"
+ }
+
+
+Alternative: Without Doing the Get Request
+------------------------------------------
+
+If you want to integrate this process into your workflow **without calling the Get Video request**,
+you must be careful about which **video key** you are using, since it changes during different stages.
+
+**⚠️ Important: Using the Correct Video Key**
+
+The ``video_key`` changes and you must use the correct key depending on when you're performing the replacement:
+
+- **Scenario 1: Replacing after initial file upload (before creating flow)**
+
+ - Use the ``video_key`` returned from the upload file request response.
+
+- **Scenario 2: Replacing after creating the flow (before publishing)**
+
+ - Use the ``key`` value from the Create Flow response.
+
+ This is required because the backend **renames the uploaded file** to distinguish it from automatically generated subformat files.
+
+- **Scenario 3: Replacing after publishing the video**
+
+ - First make an edit request to modify the published video.
+
+ - ``POST {{baseURL}}/api/deposits/video/{{deposit_id}}/actions/edit``
+
+ - Find the master file key from the response:
+
+ .. code-block:: javascript
+
+ let files = data.metadata?._files || [];
+ // Find the master file
+ let masterFile = files.find(f => f.context_type === "master");
+ video_key = masterFile.key;
+
+ - Use this ``video_key`` for the replacement request.
+
+
+Do **not** use the original video file name (``video_name``) for replacement requests,
+as this will not work due to the backend file renaming process.
+
+After finding the correct key, you can upload your new file (see `Step 3 <#step-3-upload-the-video>`_).
+
+Then, start the flow again using the new main video file, along with the updated ``version_id`` and ``video_key``.
+You can follow the same structure outlined in `Step 4 <#step-4-create-a-flow>`_.
diff --git a/cds/config.py b/cds/config.py
index 770a5e5c3..5a31302a0 100644
--- a/cds/config.py
+++ b/cds/config.py
@@ -290,6 +290,7 @@ def _parse_env_bool(var_name, default=None):
SEARCH_UI_VIDEO_MEDIUM = "templates/cds/video/featured-medium.html"
# Angular template for small size (used for search results)
SEARCH_UI_VIDEO_SMALL = "templates/cds/video/small.html"
+HOMEPAGE_VIDEO_SMALL = "templates/cds/video/small_video_card.html"
# Invenio-Stats
# =============
@@ -999,6 +1000,44 @@ def _parse_env_bool(var_name, default=None):
},
]
+HOMEPAGE_DYNAMIC_QUERIES = [
+ {
+ "label": "LATEST LECTURES",
+ "query": "/api/records/?size=4&sort=mostrecent&q=collections:Lectures",
+ "qs": "collections=Lectures",
+ "subcategories": [
+ {
+ "label": "Academic Training Lectures",
+ "qs": 'collections="Lectures::Academic Training Lectures"',
+ },
+ {
+ "label": "CERN Accelerator School Lectures",
+ "qs": 'collections="Lectures::CERN Accelerator School"',
+ },
+ {
+ "label": "E-Learning",
+ "qs": 'collections="Lectures::E-learning modules"',
+ },
+ {
+ "label": "Conference records",
+ "qs": 'collections="Lectures::Talks, Seminars and Other Events,Conference records"',
+ },
+ {
+ "label": "Scientific Seminars and Workshops",
+ "qs": 'collections="Lectures::Talks, Seminars and Other Events::Scientific Seminars and Workshops"',
+ },
+ {
+ "label": "Teacher Programmes",
+ "qs": 'collections="Lectures::Talks, Seminars and Other Events::Teacher Programmes"',
+ },
+ {
+ "label": "Student Lectures",
+ "qs": 'collections="Lectures::Talks, Seminars and Other Events::Student Lectures"',
+ },
+ ],
+ }
+]
+
FRONTPAGE_TREND_TOPICS = [
{
"label": "Antimatter",
@@ -1045,6 +1084,11 @@ def _parse_env_bool(var_name, default=None):
"qs": 'q=keywords.name:"VNR" OR keywords.name:"video news release"',
},
]
+###############################################################################
+# Record Landing page
+###############################################################################
+
+DESCRIPTION_PREVIEW_LINES = 10
###############################################################################
# Security
@@ -1088,7 +1132,7 @@ def _parse_env_bool(var_name, default=None):
"'unsafe-inline'",
],
"img-src": ["'self'", "https://*.theoplayer.com", "data:"],
- "connect-src": ["'self'", "https://*.theoplayer.com", "https://*.cern.ch"],
+ "connect-src": ["'self'", "https://*.theoplayer.com", "https://*.cern.ch", "blob:"],
"object-src": ["'self'"],
"media-src": ["'self'", "blob:"],
"frame-src": ["'self'", "https://*.theoplayer.com"],
@@ -1129,7 +1173,7 @@ def _parse_env_bool(var_name, default=None):
"https://auth.cern.ch/auth/realms/cern/protocol/openid-connect/userinfo",
)
-OAUTHCLIENT_CERN_OPENID_ALLOWED_ROLES = ["cern-user"]
+OAUTHCLIENT_CERN_OPENID_ALLOWED_ROLES = ["cern-user", "authenticated-user"]
OAUTHCLIENT_CERN_OPENID_REFRESH_TIMEDELTA = timedelta(minutes=-5)
"""Default interval for refreshing CERN extra data (e.g. groups).
@@ -1258,6 +1302,7 @@ def _parse_env_bool(var_name, default=None):
# Licence key and base URL for THEO player
THEOPLAYER_LIBRARY_LOCATION = None
THEOPLAYER_LICENSE = None
+
# Wowza server URL for m3u8 playlist generation
WOWZA_PLAYLIST_URL = (
"https://wowza.cern.ch/cds/_definist_/smil:" "{filepath}/playlist.m3u8"
@@ -1633,3 +1678,6 @@ def _parse_env_bool(var_name, default=None):
# Sets the location to share the video files among the different tasks
CDS_FILES_TMP_FOLDER = "/tmp/videos"
+
+# TODO: needs latest files-rest enabling range requests
+FILES_REST_ALLOW_RANGE_REQUESTS = True
diff --git a/cds/modules/deposit/api.py b/cds/modules/deposit/api.py
index 4c19ec5c3..089fdbea9 100644
--- a/cds/modules/deposit/api.py
+++ b/cds/modules/deposit/api.py
@@ -65,6 +65,7 @@
get_tasks_status_grouped_by_task_name,
merge_tasks_status,
)
+from ..flows.tasks import ExtractChapterFramesTask
from ..flows.models import FlowMetadata
from ..invenio_deposit.api import Deposit, has_status, preserve
from ..invenio_deposit.utils import mark_as_action
@@ -76,7 +77,7 @@
)
from ..records.minters import cds_doi_generator, is_local_doi, report_number_minter
from ..records.resolver import record_resolver
-from ..records.utils import is_record, lowercase_value
+from ..records.utils import is_record, lowercase_value, parse_video_chapters, get_existing_chapter_frame_timestamps
from ..records.validators import PartialDraft4Validator
from ..records.permissions import is_public
from .errors import DiscardConflict
@@ -504,7 +505,7 @@ def create(cls, data, id_=None, **kwargs):
data.setdefault("_access", {})
access_update = data["_access"].setdefault("update", [])
try:
- if current_user.email not in access_update:
+ if current_user.email not in access_update:
# Add the current user to the ``_access.update`` list
access_update.append(current_user.email)
except AttributeError:
@@ -869,7 +870,7 @@ def _rename_subtitles(self):
)
# copy tags to the newly created object version
for tag in subtitle_obj.tags:
- tag.object_version = obj
+ ObjectVersionTag.create_or_update(obj, tag.key, tag.value)
subtitle_obj.remove()
def _rename_master_file(self, master_file):
@@ -905,11 +906,78 @@ def _publish_edited(self):
return super(Video, self)._publish_edited()
+ def _has_chapters_changed(self, old_record=None):
+ """Check if chapters in description have changed."""
+ current_description = self.get("description", "")
+ current_chapters = parse_video_chapters(current_description)
+
+ if old_record is None:
+ # First publish - trigger if chapters exist
+ return len(current_chapters) > 0
+
+ old_description = old_record.get("description", "")
+ old_chapters = parse_video_chapters(old_description)
+
+ # Compare chapter timestamps and titles
+ if len(current_chapters) != len(old_chapters):
+ return True
+
+ for curr, old in zip(current_chapters, old_chapters):
+ if curr["seconds"] != old["seconds"] or curr["title"] != old["title"]:
+ return True
+
+ if len(current_chapters) != len(get_existing_chapter_frame_timestamps(self)):
+ # Chapters did not change, but chapter frames doesn't exist
+ return True
+
+ return False
+
+ def _trigger_chapter_frame_extraction(self):
+ """Trigger chapter frame extraction asynchronously for existing video files."""
+ try:
+ # Get the current flow for this deposit
+ current_flow = FlowMetadata.get_by_deposit(self["_deposit"]["id"])
+
+ if current_flow is None:
+ current_app.logger.warning(
+ f"No current flow found for video {self.id}. Cannot trigger chapter frame extraction."
+ )
+ return
+
+ current_app.logger.info(
+ f"Triggering asynchronous ExtractChapterFramesTask for video {self.id} with flow {current_flow.id}"
+ )
+
+ payload = current_flow.payload.copy()
+
+ current_app.logger.info(f"Submitting ExtractChapterFramesTask with payload: {payload}")
+
+ ExtractChapterFramesTask().s(**payload).apply_async()
+
+ current_app.logger.info(
+ f"ExtractChapterFramesTask submitted asynchronously for video {self.id}, flow_id: {current_flow.id}"
+ )
+ except Exception as e:
+ current_app.logger.error(
+ f"Failed to trigger async chapter frame extraction for video {self.id}: {e}"
+ )
+ import traceback
+
+ current_app.logger.error(f"Traceback: {traceback.format_exc()}")
+
@mark_as_action
- def publish(self, pid=None, id_=None, **kwargs):
+ def publish(self, pid=None, id_=None, extract_chapters=True, **kwargs):
"""Publish a video and update the related project."""
# save a copy of the old PID
video_old_id = self["_deposit"]["id"]
+
+ # Check if this is a republish and get the old record
+ old_record = None
+ try:
+ _, old_record = self.fetch_published()
+ except KeyError as e: # First publish (no pid key)
+ pass
+
try:
self["category"] = self.project["category"]
self["type"] = self.project["type"]
@@ -930,6 +998,13 @@ def publish(self, pid=None, id_=None, **kwargs):
video_published = super(Video, self).publish(pid=pid, id_=id_, **kwargs)
_, record_new = self.fetch_published()
+ # Check if chapters have changed and trigger frame extraction
+ if extract_chapters and self._has_chapters_changed(old_record):
+ current_app.logger.info(
+ f"Chapters changed for video {self.id}, triggering frame extraction"
+ )
+ self._trigger_chapter_frame_extraction()
+
# update associated project
video_published.project._update_videos(
[video_build_url(video_old_id)],
@@ -1088,7 +1163,6 @@ def _create_tags(self):
except IndexError:
return
-
def mint_doi(self):
"""Mint DOI."""
assert self.has_record()
@@ -1109,7 +1183,7 @@ def mint_doi(self):
status=PIDStatus.RESERVED,
)
return self
-
+
project_resolver = Resolver(
pid_type="depid",
diff --git a/cds/modules/deposit/ext.py b/cds/modules/deposit/ext.py
index 0675488b1..f4a9d6466 100644
--- a/cds/modules/deposit/ext.py
+++ b/cds/modules/deposit/ext.py
@@ -25,11 +25,13 @@
"""CDSDeposit app for Webhook receivers."""
import re
+import mimetypes
from invenio_base.signals import app_loaded
from invenio_db import db
from invenio_files_rest.models import ObjectVersionTag
from invenio_files_rest.signals import file_uploaded
+from invenio_files_rest.errors import InvalidKeyError
from invenio_indexer.signals import before_record_index
from invenio_records_files.utils import sorted_files_from_bucket
@@ -45,38 +47,37 @@
def _create_tags(obj):
"""Create additional tags for file."""
- # Subtitle file
- pattern = re.compile(".*_([a-zA-Z]{2})\.vtt$")
+ pattern_subtitle = re.compile(r".*_([a-zA-Z]{2})\.vtt$")
+ pattern_poster = re.compile(r"^poster\.(jpg|png)$")
+
+ # Get the media_type and content_type(file ext)
+ file_name = obj.key
+ mimetypes.add_type("subtitle/vtt", ".vtt")
+ guessed_type = mimetypes.guess_type(file_name)[0]
+ if guessed_type is None:
+ raise InvalidKeyError(description=f"Unsupported File: {file_name}")
+
+ media_type = guessed_type.split("/")[0]
+ file_ext = guessed_type.split("/")[1]
+
with db.session.begin_nested():
- # language tag
- found = pattern.findall(obj.key)
- if len(found) == 1:
- lang = found[0]
- ObjectVersionTag.create_or_update(obj, "language", lang)
- else:
- # clean to be sure there is no some previous value
- ObjectVersionTag.delete(obj, "language")
- # other tags
- ObjectVersionTag.create_or_update(obj, "content_type", "vtt")
- ObjectVersionTag.create_or_update(obj, "context_type", "subtitle")
- ObjectVersionTag.create_or_update(obj, "media_type", "subtitle")
- # refresh object
- db.session.add(obj)
+ ObjectVersionTag.create_or_update(obj, "content_type", file_ext)
+ ObjectVersionTag.create_or_update(obj, "media_type", media_type)
+ if file_ext == "vtt":
+ # language tag
+ match = pattern_subtitle.search(file_name)
+ if match:
+ ObjectVersionTag.create_or_update(obj, "language", match.group(1))
+ else:
+ ObjectVersionTag.delete(obj, "language")
+ # other tags
+ ObjectVersionTag.create_or_update(obj, "content_type", "vtt")
+ ObjectVersionTag.create_or_update(obj, "context_type", "subtitle")
+ # poster tag
+ elif pattern_poster.match(file_name):
+ ObjectVersionTag.create_or_update(obj, "context_type", "poster")
- # Poster frame
- pattern = re.compile("^poster\.(jpg|png)$")
- try:
- poster = pattern.findall(obj.key)
- if poster:
- ext = pattern.findall(poster.key)[0]
- # frame tags
- ObjectVersionTag.create_or_update(poster, "content_type", ext)
- ObjectVersionTag.create_or_update(poster, "context_type", "poster")
- ObjectVersionTag.create_or_update(poster, "media_type", "image")
- # refresh object
- db.session.add(poster)
- except IndexError:
- return
+ db.session.add(obj)
def create_tags_on_file_upload(sender, obj):
diff --git a/cds/modules/deposit/mappings/os-v2/deposits/records/videos/video/video-v1.0.0.json b/cds/modules/deposit/mappings/os-v2/deposits/records/videos/video/video-v1.0.0.json
index ae4cbfb87..01969cb3c 100644
--- a/cds/modules/deposit/mappings/os-v2/deposits/records/videos/video/video-v1.0.0.json
+++ b/cds/modules/deposit/mappings/os-v2/deposits/records/videos/video/video-v1.0.0.json
@@ -228,6 +228,9 @@
"recid": {
"type": "double"
},
+ "legacy_recid": {
+ "type": "double"
+ },
"license": {
"properties": {
"license": {
@@ -333,6 +336,133 @@
"type": "keyword"
}
}
+ },
+ "alternate_identifiers": {
+ "properties": {
+ "scheme": {
+ "type": "keyword"
+ },
+ "value": {
+ "type": "keyword"
+ }
+ }
+ },
+ "_curation": {
+ "type": "object",
+ "properties": {
+ "legacy_report_number": {
+ "type": "keyword"
+ },
+ "department": {
+ "type": "keyword"
+ },
+ "volumes": {
+ "type": "text"
+ },
+ "physical_location": {
+ "type": "text"
+ },
+ "physical_medium": {
+ "type": "text"
+ },
+ "internal_note": {
+ "type": "text"
+ },
+ "digitized": {
+ "type": "object",
+ "properties": {
+ "url": {
+ "type": "text"
+ },
+ "format": {
+ "type": "text"
+ },
+ "link_text": {
+ "type": "text"
+ },
+ "public_note": {
+ "type": "text"
+ },
+ "nonpublic_note": {
+ "type": "text"
+ },
+ "md5_checksum": {
+ "type": "text"
+ },
+ "source": {
+ "type": "text"
+ }
+ }
+ },
+ "legacy_marc_fields": {
+ "type": "object",
+ "properties": {
+ "964": {
+ "type": "text"
+ },
+ "336": {
+ "type": "text"
+ },
+ "583": {
+ "type": "text"
+ },
+ "306": {
+ "type": "text"
+ }
+ }
+ }
+ }
+ },
+ "additional_titles": {
+ "type": "object",
+ "properties": {
+ "title": {
+ "type": "text"
+ },
+ "type": {
+ "type": "keyword"
+ },
+ "lang": {
+ "type": "keyword"
+ }
+ }
+ },
+ "additional_descriptions": {
+ "type": "object",
+ "properties": {
+ "description": {
+ "type": "text"
+ },
+ "type": {
+ "type": "keyword"
+ },
+ "lang": {
+ "type": "keyword"
+ }
+ }
+ },
+ "related_identifiers": {
+ "type": "object",
+ "properties": {
+ "identifier": {
+ "type": "text"
+ },
+ "scheme": {
+ "type": "keyword"
+ },
+ "relation_type": {
+ "type": "keyword"
+ },
+ "resource_type": {
+ "type": "keyword"
+ }
+ }
+ },
+ "collections": {
+ "type": "keyword"
+ },
+ "additional_languages": {
+ "type": "text"
}
}
}
diff --git a/cds/modules/deposit/receivers.py b/cds/modules/deposit/receivers.py
index 18727fec7..d5949a39d 100644
--- a/cds/modules/deposit/receivers.py
+++ b/cds/modules/deposit/receivers.py
@@ -33,6 +33,7 @@
from cds.modules.flows.tasks import (
DownloadTask,
ExtractFramesTask,
+ ExtractChapterFramesTask,
ExtractMetadataTask,
TranscodeVideoTask,
)
@@ -87,4 +88,5 @@ def register_celery_class_based_tasks(sender, app=None):
celery.register_task(ExtractMetadataTask())
celery.register_task(DownloadTask())
celery.register_task(ExtractFramesTask())
+ celery.register_task(ExtractChapterFramesTask())
celery.register_task(TranscodeVideoTask())
diff --git a/cds/modules/deposit/schemas/deposits/records/videos/project/project-v1.0.0.json b/cds/modules/deposit/schemas/deposits/records/videos/project/project-v1.0.0.json
index 2e91fa703..0729bb117 100644
--- a/cds/modules/deposit/schemas/deposits/records/videos/project/project-v1.0.0.json
+++ b/cds/modules/deposit/schemas/deposits/records/videos/project/project-v1.0.0.json
@@ -142,6 +142,7 @@
"Camera Operator",
"Comments by",
"Co-Producer",
+ "ContactPerson",
"Creator",
"Credits",
"Director",
@@ -150,7 +151,9 @@
"Narrator",
"Photography",
"Producer",
+ "RelatedPerson",
"Reporter",
+ "ResearchGroup",
"Screenwriter",
"Speaker",
"Subtitles by",
diff --git a/cds/modules/deposit/schemas/deposits/records/videos/video/definitions-v1.0.0.json b/cds/modules/deposit/schemas/deposits/records/videos/video/definitions-v1.0.0.json
new file mode 100644
index 000000000..e1c217180
--- /dev/null
+++ b/cds/modules/deposit/schemas/deposits/records/videos/video/definitions-v1.0.0.json
@@ -0,0 +1,57 @@
+{
+ "languages": {
+ "enum": [
+ "ar",
+ "ast",
+ "bg",
+ "ca",
+ "ch",
+ "cs",
+ "cy",
+ "da",
+ "de",
+ "el",
+ "en",
+ "en-fr",
+ "es",
+ "et",
+ "eu",
+ "fi",
+ "fr",
+ "ga",
+ "gd",
+ "gl",
+ "he",
+ "hi",
+ "hr",
+ "hu",
+ "it",
+ "lt",
+ "ja",
+ "ka",
+ "ko",
+ "kw",
+ "nb",
+ "nl",
+ "nn",
+ "no",
+ "pl",
+ "pt",
+ "rm",
+ "ro",
+ "ru",
+ "se",
+ "silent",
+ "sk",
+ "sl",
+ "sr",
+ "sv",
+ "tr",
+ "uk",
+ "ur",
+ "zh",
+ "zh_CN",
+ "zh_TW"
+ ]
+ }
+}
diff --git a/cds/modules/deposit/schemas/deposits/records/videos/video/video-v1.0.0.json b/cds/modules/deposit/schemas/deposits/records/videos/video/video-v1.0.0.json
index 8c84e84bd..ae15d3d3c 100644
--- a/cds/modules/deposit/schemas/deposits/records/videos/video/video-v1.0.0.json
+++ b/cds/modules/deposit/schemas/deposits/records/videos/video/video-v1.0.0.json
@@ -62,6 +62,7 @@
"Camera Operator",
"Comments by",
"Co-Producer",
+ "ContactPerson",
"Creator",
"Credits",
"Director",
@@ -70,7 +71,9 @@
"Narrator",
"Photography",
"Producer",
+ "RelatedPerson",
"Reporter",
+ "ResearchGroup",
"Screenwriter",
"Speaker",
"Subtitles by",
@@ -190,6 +193,24 @@
"description": "List of identifiers on external systems.",
"title": "External identifiers"
},
+ "alternate_identifiers": {
+ "items": {
+ "properties": {
+ "scheme": {
+ "title": "Scheme of the identifier (Vocabulary)",
+ "type": "string",
+ "enum": ["URL", "DOI", "CDS"]
+ },
+ "value": {
+ "title": "Value of the identifier",
+ "type": "string"
+ }
+ }
+ },
+ "required": ["value", "scheme"],
+ "title": "List of alternate identifiers of the record",
+ "type": "array"
+ },
"$schema": {
"type": "string"
},
@@ -441,59 +462,7 @@
},
"language": {
"default": "en",
- "enum": [
- "ar",
- "ast",
- "bg",
- "ca",
- "ch",
- "cs",
- "cy",
- "da",
- "de",
- "el",
- "en",
- "en-fr",
- "es",
- "et",
- "eu",
- "fi",
- "fr",
- "ga",
- "gd",
- "gl",
- "he",
- "hi",
- "hr",
- "hu",
- "it",
- "lt",
- "ja",
- "ka",
- "ko",
- "kw",
- "nb",
- "nl",
- "nn",
- "no",
- "pl",
- "pt",
- "rm",
- "ro",
- "ru",
- "se",
- "silent",
- "sk",
- "sl",
- "sr",
- "sv",
- "tr",
- "uk",
- "ur",
- "zh",
- "zh_CN",
- "zh_TW"
- ],
+ "$ref": "definitions-v1.0.0.json#/languages",
"type": "string",
"description": "A language of the resource."
},
@@ -563,59 +532,7 @@
"language": {
"description": "A language of the resource.",
"default": "en",
- "enum": [
- "ar",
- "ast",
- "bg",
- "ca",
- "ch",
- "cs",
- "cy",
- "da",
- "de",
- "el",
- "en",
- "en-fr",
- "es",
- "et",
- "eu",
- "fi",
- "fr",
- "ga",
- "gd",
- "gl",
- "he",
- "hi",
- "hr",
- "hu",
- "it",
- "lt",
- "ja",
- "ka",
- "ko",
- "kw",
- "nb",
- "nl",
- "nn",
- "no",
- "pl",
- "pt",
- "rm",
- "ro",
- "ru",
- "se",
- "silent",
- "sk",
- "sl",
- "sr",
- "sv",
- "tr",
- "uk",
- "ur",
- "zh",
- "zh_CN",
- "zh_TW"
- ],
+ "$ref": "definitions-v1.0.0.json#/languages",
"type": "string"
},
"license": {
@@ -696,11 +613,218 @@
"type": "number",
"description": "Invenio record identifier (integer)."
},
+ "legacy_recid": {
+ "type": "number",
+ "description": "Legacy record identifier (integer). Kept for auditing reasons."
+ },
"original_source": {
"type": "string"
},
"_project_id": {
"type": "string"
+ },
+ "_curation": {
+ "properties": {
+ "legacy_report_number": {
+ "title": "Legacy record report number.",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "department": {
+ "title": "CERN department.",
+ "type": "string"
+ },
+ "volumes": {
+ "title": "Volume list for this record.",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "physical_location": {
+ "title": "Tag 852 physical location.",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "physical_medium": {
+ "title": "Tag 340 physical medium.",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "internal_note": {
+ "title": "Tag 595 internal note.",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "digitized": {
+ "title": "Digitized metadata.",
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "url": {
+ "type": "string"
+ },
+ "format": {
+ "type": "string"
+ },
+ "link_text": {
+ "type": "string"
+ },
+ "public_note": {
+ "type": "string"
+ },
+ "nonpublic_note": {
+ "type": "string"
+ },
+ "md5_checksum": {
+ "type": "string"
+ },
+ "source": {
+ "type": "string"
+ }
+ }
+ }
+ },
+ "legacy_marc_fields": {
+ "type": "object",
+ "properties": {
+ "964": {
+ "title": "Tag 964.",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "336": {
+ "title": "Tag 336.",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "583": {
+ "title": "Tag 583.",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "306": {
+ "title": "Tag 306.",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ }
+ }
+ },
+ "title": "Fields that needs curation.",
+ "description": "This section contains MARC21 metadata fields that could not be mapped during weblectures migration.",
+ "type": "object"
+ },
+ "additional_titles": {
+ "description": "Additional record titles.",
+ "type": "array",
+ "items": {
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "title": {
+ "description": "Additional title of the record.",
+ "type": "string"
+ },
+ "type": {
+ "type": "string",
+ "enum": ["Subtitle", "Other", "TranslatedTitle", "AlternativeTitle"]
+ },
+ "lang": {
+ "type": "string",
+ "$ref": "definitions-v1.0.0.json#/languages"
+ }
+ }
+ }
+ },
+ "additional_descriptions": {
+ "description": "Additional descriptions for the record.",
+ "type": "array",
+ "items": {
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "description": {
+ "type": "string",
+ "description": "Descriptive content."
+ },
+ "type": {
+ "type": "string",
+ "enum": [
+ "Abstract",
+ "Methods",
+ "Other",
+ "SeriesInformation",
+ "TableOfContents",
+ "TechnicalInfo"
+ ]
+ },
+ "lang": {
+ "type": "string",
+ "$ref": "definitions-v1.0.0.json#/languages"
+ }
+ }
+ }
+ },
+ "related_identifiers": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "required": ["identifier", "scheme", "relation_type"],
+ "additionalProperties": false,
+ "properties": {
+ "identifier": {
+ "type": "string",
+ "description": "The actual identifier (e.g., URL or DOI)."
+ },
+ "scheme": {
+ "type": "string",
+ "enum": ["URL", "DOI", "CDS", "Indico"],
+ "description": "The scheme describing the identifier type."
+ },
+ "relation_type": {
+ "type": "string",
+ "enum": ["IsPartOf", "IsVariantFormOf"],
+ "description": "Describes the relationship with the current record."
+ },
+ "resource_type": {
+ "type": "string",
+ "enum": ["Event", "ConferencePaper", "Report", "Book"],
+ "description": "Type of the related resource."
+ }
+ }
+ }
+ },
+ "collections": {
+ "items": {
+ "type": "string"
+ },
+ "type": "array"
+ },
+ "additional_languages": {
+ "description": "Additional languages for the record.",
+ "type": "array",
+ "items": {
+ "type": "string",
+ "$ref": "definitions-v1.0.0.json#/languages"
+ }
}
}
}
\ No newline at end of file
diff --git a/cds/modules/deposit/static/json/cds_deposit/forms/project.json b/cds/modules/deposit/static/json/cds_deposit/forms/project.json
index 254ffeb66..b80c100fe 100644
--- a/cds/modules/deposit/static/json/cds_deposit/forms/project.json
+++ b/cds/modules/deposit/static/json/cds_deposit/forms/project.json
@@ -160,6 +160,10 @@
"value": "Co-Producer",
"name": "Co-Producer"
},
+ {
+ "value": "ContactPerson",
+ "name": "Contact Person"
+ },
{
"value": "Creator",
"name": "Creator"
@@ -192,10 +196,18 @@
"value": "Producer",
"name": "Producer"
},
+ {
+ "value": "RelatedPerson",
+ "name": "Related Person"
+ },
{
"value": "Reporter",
"name": "Reporter"
},
+ {
+ "value": "ResearchGroup",
+ "name": "Research Group"
+ },
{
"value": "Screenwriter",
"name": "Screenwriter"
diff --git a/cds/modules/deposit/static/json/cds_deposit/forms/video.json b/cds/modules/deposit/static/json/cds_deposit/forms/video.json
index c90b9884b..1d5448dd2 100644
--- a/cds/modules/deposit/static/json/cds_deposit/forms/video.json
+++ b/cds/modules/deposit/static/json/cds_deposit/forms/video.json
@@ -292,6 +292,10 @@
"value": "Co-Producer",
"name": "Co-Producer"
},
+ {
+ "value": "ContactPerson",
+ "name": "Contact Person"
+ },
{
"value": "Creator",
"name": "Creator"
@@ -324,10 +328,18 @@
"value": "Producer",
"name": "Producer"
},
+ {
+ "value": "RelatedPerson",
+ "name": "Related Person"
+ },
{
"value": "Reporter",
"name": "Reporter"
},
+ {
+ "value": "ResearchGroup",
+ "name": "Research Group"
+ },
{
"value": "Screenwriter",
"name": "Screenwriter"
@@ -555,17 +567,143 @@
],
"related_links": [
{
- "key": "related_links",
+ "key": "related_identifiers",
"type": "array",
- "add": "Add related links",
+ "add": "Add related identifiers",
+ "title": "Related Identifiers",
+ "description": "Add identifiers for related resources such as DOIs, URLs, or Indico event IDs.",
"items": [
{
- "title": "Name",
- "key": "related_links[].name"
+ "title": "Identifier",
+ "key": "related_identifiers[].identifier",
+ "required": true,
+ "placeholder": "e.g., 10.1234/example.doi, https://example.com, 12345",
+ "description": "The identifier value (DOI, URL, or Indico event ID)"
+ },
+ {
+ "title": "Scheme",
+ "key": "related_identifiers[].scheme",
+ "type": "select",
+ "required": true,
+ "placeholder": "Select identifier scheme",
+ "description": "The type of identifier scheme",
+ "titleMap": [
+ {
+ "value": "URL",
+ "name": "URL (Uniform Resource Locator)"
+ },
+ {
+ "value": "DOI",
+ "name": "DOI (Digital Object Identifier)"
+ },
+ {
+ "value": "Indico",
+ "name": "Indico (Event ID)"
+ },
+ {
+ "value": "CDS",
+ "name": "CDS (CERN Document Server Record ID)"
+ }
+ ]
+ },
+ {
+ "title": "Relation Type",
+ "key": "related_identifiers[].relation_type",
+ "type": "select",
+ "required": true,
+ "placeholder": "Select relation type",
+ "description": "How this resource relates to the identified resource",
+ "titleMap": [
+ {
+ "value": "IsPartOf",
+ "name": "Is part of"
+ },
+ {
+ "value": "IsVariantFormOf",
+ "name": "Is variant form of"
+ }
+ ]
},
{
- "title": "URL",
- "key": "related_links[].url"
+ "title": "Resource Type",
+ "key": "related_identifiers[].resource_type",
+ "type": "select",
+ "placeholder": "Select resource type (optional)",
+ "description": "The type of the related resource (optional)",
+ "titleMap": [
+ {
+ "value": "Audiovisual",
+ "name": "Audiovisual"
+ },
+ {
+ "value": "Book",
+ "name": "Book"
+ },
+ {
+ "value": "Collection",
+ "name": "Collection"
+ },
+ {
+ "value": "ConferencePaper",
+ "name": "Conference Paper"
+ },
+ {
+ "value": "DataPaper",
+ "name": "Data Paper"
+ },
+ {
+ "value": "Dataset",
+ "name": "Dataset"
+ },
+ {
+ "value": "Event",
+ "name": "Event"
+ },
+ {
+ "value": "Image",
+ "name": "Image"
+ },
+ {
+ "value": "InteractiveResource",
+ "name": "Interactive Resource"
+ },
+ {
+ "value": "Model",
+ "name": "Model"
+ },
+ {
+ "value": "PhysicalObject",
+ "name": "Physical Object"
+ },
+ {
+ "value": "Report",
+ "name": "Report"
+ },
+ {
+ "value": "Service",
+ "name": "Service"
+ },
+ {
+ "value": "Software",
+ "name": "Software"
+ },
+ {
+ "value": "Sound",
+ "name": "Sound"
+ },
+ {
+ "value": "Text",
+ "name": "Text"
+ },
+ {
+ "value": "Workflow",
+ "name": "Workflow"
+ },
+ {
+ "value": "Other",
+ "name": "Other"
+ }
+ ]
}
]
}
diff --git a/cds/modules/deposit/static/templates/cds_deposit/deposits.html b/cds/modules/deposit/static/templates/cds_deposit/deposits.html
index bb9c37b8a..1815ee009 100644
--- a/cds/modules/deposit/static/templates/cds_deposit/deposits.html
+++ b/cds/modules/deposit/static/templates/cds_deposit/deposits.html
@@ -39,6 +39,7 @@
Tips
Click here to select videos to upload
You can also Drag & Drop video files here
+ supported files {{ $ctrl.videoExtensions }}
diff --git a/cds/modules/deposit/static/templates/cds_deposit/types/video/form.html b/cds/modules/deposit/static/templates/cds_deposit/types/video/form.html
index 3e3fe027c..bd606dfa7 100644
--- a/cds/modules/deposit/static/templates/cds_deposit/types/video/form.html
+++ b/cds/modules/deposit/static/templates/cds_deposit/types/video/form.html
@@ -206,7 +206,7 @@
- Related links
+ Related information
diff --git a/cds/modules/deposit/static/templates/cds_deposit/types/video/uploader.html b/cds/modules/deposit/static/templates/cds_deposit/types/video/uploader.html
index 872f8f518..829343572 100644
--- a/cds/modules/deposit/static/templates/cds_deposit/types/video/uploader.html
+++ b/cds/modules/deposit/static/templates/cds_deposit/types/video/uploader.html
@@ -9,7 +9,7 @@
- Master & Subformats files
+ Main & Subformats files
@@ -224,12 +224,12 @@ Tips and suggestions
-
-
+
+
- Other files
+ Additional files
-
+
@@ -242,7 +242,7 @@
Tips and suggestions
-
+
| Filename |
Size |
@@ -271,6 +271,32 @@ Tips and suggestions
+
+
+
+
+
+
+
+
+
+
Upload complimentary files for this video
+
Or Drag & Drop files
+
+
+
+
+
+
@@ -283,39 +309,43 @@ Tips and suggestions
-
-
-
-
-
-
-
-
-
Upload complimentary files for this video
+
+
+
+
+ Replace Video File
+
+
+
+
+
+
+
+
+
+
To replace the video file, just upload a video here.
+
Or Drag & Drop files
+
-
Or Drag & Drop files
-
-
-
Tips and suggestions
-
- - To replace the video file, just upload another video.
-
-
+
+
Tips and suggestions
- Click the Edit button on the top right corner to add more files.
-
+
\ No newline at end of file
diff --git a/cds/modules/deposit/views.py b/cds/modules/deposit/views.py
index 1d880ed65..da8b9a7c9 100644
--- a/cds/modules/deposit/views.py
+++ b/cds/modules/deposit/views.py
@@ -25,6 +25,7 @@
"""CDS interface."""
+from cds.modules.ldap.decorators import require_upload_permission
from flask import (
Blueprint,
abort,
@@ -118,6 +119,7 @@ def to_links_js(pid, deposit=None, dep_type=None):
@blueprint.route("/deposit/reportnumbers/new", methods=["GET", "POST"])
@login_required
+@require_upload_permission()
def reserve_report_number():
"""Form to reserver a new report number."""
if not has_read_record_eos_path_permission(current_user, None):
@@ -156,6 +158,7 @@ def reserve_report_number():
"/deposit/reportnumbers/assign/
", methods=["GET", "POST"]
)
@login_required
+@require_upload_permission()
def assign_report_number(depid):
"""Form to reserver a new report number."""
if not has_read_record_eos_path_permission(current_user, None):
diff --git a/cds/modules/fixtures/data/categories.json b/cds/modules/fixtures/data/categories.json
index 015ea1951..1b100c679 100644
--- a/cds/modules/fixtures/data/categories.json
+++ b/cds/modules/fixtures/data/categories.json
@@ -95,5 +95,18 @@
"_access": {
"read": ["alice-secretariat@cern.ch"]
}
+ },
+ {
+ "name": "LECTURES",
+ "types": ["VIDEO"],
+ "access": {
+ "public": true,
+ "restricted": [],
+ "responsible": ["weblecture-service@cern.ch"]
+ },
+ "_record_type": ["PROJECT"],
+ "_access": {
+ "read": ["weblecture-service@cern.ch"]
+ }
}
]
diff --git a/cds/modules/fixtures/data/pages/guides/search.html b/cds/modules/fixtures/data/pages/guides/search.html
index af221fe89..2bc11082d 100644
--- a/cds/modules/fixtures/data/pages/guides/search.html
+++ b/cds/modules/fixtures/data/pages/guides/search.html
@@ -371,6 +371,8 @@ Contributor roles (CV)
Comments by
+Contact Person
+
Co-Producer
Creator
@@ -389,8 +391,12 @@ Contributor roles (CV)
Producer
+Related Person
+
Reporter
+Research Group
+
Screenwriter
Speaker
diff --git a/cds/modules/flows/api.py b/cds/modules/flows/api.py
index 136bef012..09acbaadf 100644
--- a/cds/modules/flows/api.py
+++ b/cds/modules/flows/api.py
@@ -39,6 +39,7 @@
from .tasks import (
CeleryTask,
DownloadTask,
+ ExtractChapterFramesTask,
ExtractFramesTask,
ExtractMetadataTask,
TranscodeVideoTask,
@@ -245,6 +246,7 @@ def _find_celery_task_by_name(name):
ExtractMetadataTask,
ExtractFramesTask,
TranscodeVideoTask,
+ ExtractChapterFramesTask,
]:
if celery_task.name == name:
return celery_task
diff --git a/cds/modules/flows/tasks.py b/cds/modules/flows/tasks.py
index eaa97952a..a3c2009ea 100644
--- a/cds/modules/flows/tasks.py
+++ b/cds/modules/flows/tasks.py
@@ -28,6 +28,7 @@
import shutil
import signal
import tempfile
+from io import BytesIO
import jsonpatch
import requests
@@ -57,12 +58,12 @@
from cds.modules.flows.models import FlowTaskMetadata
from cds.modules.flows.models import FlowTaskStatus as FlowTaskStatus
-
+from cds.modules.records.api import CDSVideosFilesIterator
from ..ffmpeg import ff_frames, ff_probe_all
from ..opencast.api import OpenCast
from ..opencast.error import RequestError
from ..opencast.utils import get_qualities
-from ..records.utils import to_string
+from ..records.utils import to_string, parse_video_chapters, get_existing_chapter_frame_timestamps
from ..xrootd.utils import file_opener_xrootd
from .deposit import index_deposit_project
from .files import dispose_object_version, move_file_into_local
@@ -197,7 +198,9 @@ def _meta_exception_envelope(self, exc):
NOTE: workaround to be able to save the payload in celery in case of
exceptions.
"""
- meta = dict(message=str(exc), payload=self._base_payload)
+ # Safety check in case base payload is not set yet
+ payload = getattr(self, '_base_payload', {})
+ meta = dict(message=str(exc), payload=payload)
return dict(exc_message=meta, exc_type=exc.__class__.__name__)
def on_failure(self, exc, task_id, args, kwargs, einfo):
@@ -223,7 +226,16 @@ def on_success(self, exc, task_id, args, kwargs):
def _reindex_video_project(self):
"""Reindex video and project."""
with celery_app.flask_app.app_context():
- deposit_id = self._base_payload["deposit_id"]
+ # Safety check in case base payload is not set yet
+ if not hasattr(self, '_base_payload') or not self._base_payload or 'deposit_id' not in self._base_payload:
+ if hasattr(self, 'deposit_id') and self.deposit_id:
+ deposit_id = self.deposit_id
+ else:
+ self.log("Cannot reindex: deposit_id not available")
+ return
+ else:
+ deposit_id = self._base_payload["deposit_id"]
+
try:
index_deposit_project(deposit_id)
except PIDDeletedError:
@@ -590,10 +602,10 @@ def progress_updater(current_frame):
object_=self.object_version,
output_dir=output_folder,
progress_updater=progress_updater,
- **options
+ **options,
),
object_=self.object_version,
- **options
+ **options,
)
except Exception:
db.session.rollback()
@@ -601,6 +613,8 @@ def progress_updater(current_frame):
self.clean(version_id=self.object_version_id)
raise
+ total_frames = len(frames)
+
# Generate GIF images
self._create_gif(
bucket=str(self.object_version.bucket.id),
@@ -618,7 +632,7 @@ def progress_updater(current_frame):
db.session.commit()
self.log("Finished task {0}".format(kwargs["task_id"]))
- return "Created {0} frames.".format(len(frames))
+ return "Created {0} frames.".format(total_frames)
@classmethod
def _time_position(cls, duration, frames_start=5, frames_end=95, frames_gap=10):
@@ -648,7 +662,7 @@ def _create_tmp_frames(
duration,
output_dir,
progress_updater=None,
- **kwargs
+ **kwargs,
):
"""Create frames in temporary files."""
# Generate frames
@@ -727,6 +741,276 @@ def _create_object(
[ObjectVersionTag.create(obj, k, to_string(tags[k])) for k in tags]
+class ExtractChapterFramesTask(AVCTask):
+ """Extract chapter frames task - dedicated task for chapter frame extraction only."""
+
+ name = "file_video_extract_chapter_frames"
+
+ @staticmethod
+ def clean(version_id, valid_chapter_seconds=None, *args, **kwargs):
+ """Delete generated chapter frame ObjectVersion slaves.
+
+ - If valid_chapter_seconds is given, keep them.
+ - If not, remove all chapter frames.
+ """
+ valid_chapter_seconds = valid_chapter_seconds or []
+ # remove all objects version "slave" with type "frame" that are chapter frames
+ tag_alias_1 = aliased(ObjectVersionTag)
+ tag_alias_2 = aliased(ObjectVersionTag)
+ tag_alias_3 = aliased(ObjectVersionTag)
+
+ slaves = (
+ ObjectVersion.query.join(tag_alias_1, ObjectVersion.tags)
+ .join(tag_alias_2, ObjectVersion.tags)
+ .join(tag_alias_3, ObjectVersion.tags)
+ .filter(tag_alias_1.key == "master", tag_alias_1.value == version_id)
+ .filter(tag_alias_2.key == "context_type", tag_alias_2.value == "frame")
+ .filter(tag_alias_3.key == "is_chapter_frame", tag_alias_3.value == "true")
+ .all()
+ )
+
+ for slave in slaves:
+ ts_val = next(t.value for t in slave.tags if t.key == "timestamp")
+ if ts_val in valid_chapter_seconds:
+ continue
+ dispose_object_version(slave)
+
+ # If no valid chapter seconds, remove the chapters.vtt file
+ if not valid_chapter_seconds:
+ master_obj = ObjectVersion.query.get(version_id)
+ vtt_objs = ObjectVersion.get_versions(master_obj.bucket_id, "chapters.vtt")
+ for vtt_obj in vtt_objs:
+ dispose_object_version(vtt_obj)
+
+ def run(self, *args, **kwargs):
+ """Extract frames only at chapter timestamps from video description.
+
+ This task is specifically designed to extract frames for chapters only,
+ without affecting other frame extraction processes.
+
+ The task receives parameters through the standard AVCTask initialization:
+ - self.deposit_id: The deposit ID containing the video description
+ - self.object_version: The ObjectVersion of the master video file
+ - self.flow_id: The current flow ID for task metadata integration
+ """
+
+ # Create or update the TaskMetadata
+ flow_task_metadata = self.get_or_create_flow_task()
+ kwargs["celery_task_id"] = str(self.request.id)
+ kwargs["task_id"] = str(flow_task_metadata.id)
+ flow_task_metadata.payload = self.get_full_payload(**kwargs)
+ flow_task_metadata.status = FlowTaskStatus.STARTED
+ flow_task_metadata.message = ""
+ db.session.commit()
+
+ self.log("Started task {0}".format(kwargs["task_id"]))
+
+ output_folder = tempfile.mkdtemp()
+
+ # Remove temporary directory on abrupt execution halts.
+ self.set_revoke_handler(
+ lambda: shutil.rmtree(output_folder, ignore_errors=True)
+ )
+
+ def progress_updater(current_chapter):
+ """Progress reporter."""
+ percentage = current_chapter / len(chapters) * 100
+ meta = dict(
+ payload=dict(size=len(chapters), percentage=percentage),
+ message="Extracting chapter frames [{0} out of {1}]".format(
+ current_chapter, len(chapters)
+ ),
+ )
+ self.log(meta["message"])
+
+ bucket_was_locked = False
+ if self.object_version.bucket.locked:
+ # If record was published we need to unlock the bucket
+ bucket_was_locked = True
+ self.object_version.bucket.locked = False
+
+ try:
+ # Get the deposit to access the description
+ from cds.modules.deposit.api import deposit_video_resolver
+ db.session.refresh(self.object_version)
+ deposit_video = deposit_video_resolver(self.deposit_id)
+ description = deposit_video.get("description", "")
+
+ self.log("Found description with {0} characters".format(len(description)))
+
+ # Parse chapters from description
+ chapters = parse_video_chapters(description)
+
+ self.log("Found {0} chapters in description".format(len(chapters)))
+
+ # Get video duration from metadata
+ duration = float(self._base_payload.get("tags", {}).get("duration", 0))
+
+ if duration == 0:
+ raise ValueError("Video duration is 0 - cannot extract frames")
+
+ # Check which timestamps already have frames
+ existing_timestamps = get_existing_chapter_frame_timestamps(deposit_video)
+
+ frames, chapter_seconds = self._create_chapter_frames(
+ chapters=chapters,
+ duration=duration,
+ object_=self.object_version,
+ output_dir=output_folder,
+ existing_timestamps=existing_timestamps,
+ progress_updater=progress_updater,
+ )
+
+ # Clean unused chapters
+ self.clean(version_id=self.object_version_id, valid_chapter_seconds=chapter_seconds)
+
+ # Create or update WebVTT file for chapters
+ self._build_chapter_vtt(chapters, duration)
+
+ # Sync deposit and record files
+ sync_records_with_deposit_files(self.deposit_id)
+
+ except Exception:
+ db.session.rollback()
+ shutil.rmtree(output_folder, ignore_errors=True)
+ self.clean(version_id=self.object_version_id)
+ raise
+
+ total_frames = len(frames)
+
+ if bucket_was_locked:
+ # Lock the bucket again
+ self.object_version.bucket.locked = True
+
+ # Cleanup
+ shutil.rmtree(output_folder)
+
+ self.log("Finished task {0}".format(kwargs["task_id"]))
+ return "Created {0} chapter frames.".format(total_frames)
+
+ @classmethod
+ def _create_chapter_frames(
+ cls,
+ chapters,
+ duration,
+ object_,
+ output_dir,
+ existing_timestamps,
+ progress_updater=None,
+ ):
+ """Create frames for chapters that don't already exist at those timestamps."""
+ created_frames = []
+ valid_chapter_seconds = []
+ current_chapter = 0
+
+ with move_file_into_local(object_, delete=True) as url:
+ for chapter in chapters:
+ current_chapter += 1
+
+ if progress_updater:
+ progress_updater(current_chapter)
+
+ chapter_seconds = chapter["seconds"]
+ chapter_title = chapter["title"]
+
+ # Skip chapters that are beyond video duration
+ if chapter_seconds > duration:
+ continue
+
+ # For 0:00 chapters, use a small offset to avoid extraction issues
+ chapter_seconds = max(chapter_seconds, 0.1) if chapter_seconds == 0 else chapter_seconds
+ valid_chapter_seconds.append(to_string(chapter_seconds))
+
+ # Skip if frame already exists at this timestamp (with some tolerance)
+ timestamp_exists = any(
+ abs(existing_ts - chapter_seconds) < 0.1
+ for existing_ts in existing_timestamps
+ )
+ if timestamp_exists:
+ continue
+
+ frame_filename = "chapter-{0}.jpg".format(int(chapter_seconds))
+ frame_path = os.path.join(output_dir, frame_filename)
+
+ # Ensure we don't exceed duration
+ if chapter_seconds + 0.01 >= duration:
+ chapter_seconds = max(0, duration - 0.02)
+ try:
+ # Extract single frame at chapter timestamp using ff_frames
+ ff_frames(
+ input_file=url,
+ start=chapter_seconds,
+ end=chapter_seconds + 0.01, # Extract just one frame
+ step=1,
+ duration=duration,
+ output=frame_path,
+ )
+
+ if os.path.exists(frame_path) and os.path.getsize(frame_path) > 0:
+ # Create ObjectVersion for chapter frame (as normal frame)
+ ExtractFramesTask._create_object(
+ bucket=object_.bucket,
+ key=frame_filename,
+ stream=file_opener_xrootd(frame_path, "rb"),
+ size=os.path.getsize(frame_path),
+ media_type="image",
+ context_type="frame",
+ master_id=object_.version_id,
+ is_chapter_frame=True,
+ timestamp=chapter_seconds,
+ )
+
+ created_frames.append(frame_path)
+
+ except Exception as e:
+ # Log error but continue with other chapters
+ current_app.logger.error(
+ "Failed to extract frame for chapter at {0}s: {1}".format(
+ chapter_seconds, str(e)
+ )
+ )
+ continue
+
+ return created_frames, valid_chapter_seconds
+
+ def _build_chapter_vtt(self, chapters, duration):
+ """Build WebVTT content string from chapters list."""
+ if not chapters:
+ return
+ vtt = "WEBVTT\n\n"
+ for i, c in enumerate(sorted(chapters, key=lambda x: x["seconds"])):
+ start = c["seconds"]
+ end = chapters[i+1]["seconds"] if i+1 < len(chapters) else duration
+ if end > duration:
+ end = duration
+ start_str = "{:02}:{:02}:{:02}.000".format(
+ int(start // 3600),
+ int((start % 3600) // 60),
+ int(start % 60)
+ )
+ end_str = "{:02}:{:02}:{:02}.000".format(
+ int(end // 3600),
+ int((end % 3600) // 60),
+ int(end % 60)
+ )
+ vtt += f"{i+1}\n{start_str} --> {end_str}\n{c['title']}\n\n"
+
+ vtt_bytes = vtt.encode("utf-8")
+ vtt_key = "chapters.vtt"
+
+ bucket = as_bucket(self.object_version.bucket.id)
+ obj = ObjectVersion.create(
+ bucket=bucket,
+ key=vtt_key,
+ stream=BytesIO(vtt_bytes),
+ size=len(vtt_bytes),
+ )
+ ObjectVersionTag.create(obj, "media_type", "chapters")
+ ObjectVersionTag.create(obj, "context_type", "chapters")
+ ObjectVersionTag.create(obj, "content_type", "vtt")
+ self.log("Created chapters.vtt")
+
+
class TranscodeVideoTask(AVCTask):
"""Transcode video task.
@@ -793,7 +1077,7 @@ def _update_flow_tasks(self, flow_tasks, status, message, **kwargs):
opencast_publication_tag=current_app.config["CDS_OPENCAST_QUALITIES"][
quality
]["opencast_publication_tag"],
- **kwargs # may contain `opencast_event_id`
+ **kwargs, # may contain `opencast_event_id`
)
# JSONb cols needs to be assigned (not updated) to be persisted
flow_task_metadata.payload = new_payload
@@ -848,7 +1132,7 @@ def _start_transcodable_flow_tasks_or_cancel(self, wanted_qualities=None):
new_payload.update(
task_id=str(t.id),
celery_task_id=str(self.request.id),
- **self._base_payload
+ **self._base_payload,
)
# JSONb cols needs to be assigned (not updated) to be persisted
t.payload = new_payload
diff --git a/cds/modules/home/templates/cds_home/home.html b/cds/modules/home/templates/cds_home/home.html
index 8be685422..69f4382fc 100644
--- a/cds/modules/home/templates/cds_home/home.html
+++ b/cds/modules/home/templates/cds_home/home.html
@@ -78,8 +78,53 @@ {{ _('TRENDS') }}
+
+ {% for section in config.HOMEPAGE_DYNAMIC_QUERIES %}
+
+
+
+
+ {% if section.subcategories %}
+
+ {% endif %}
+
+
+
+
+
+
+
+
+
+
+ {% endfor %}
+
+
+
-
+
diff --git a/cds/modules/home/views.py b/cds/modules/home/views.py
index 9a70122db..968d341f0 100644
--- a/cds/modules/home/views.py
+++ b/cds/modules/home/views.py
@@ -25,6 +25,8 @@
from invenio_cache.decorators import cached_unless_authenticated
from invenio_i18n import lazy_gettext as _
+from ..records.permissions import has_upload_permission
+
blueprint = Blueprint(
"cds_home",
__name__,
@@ -58,4 +60,5 @@ def init_menu(app):
"invenio_deposit_ui.index",
_("Upload"),
order=2,
+ visible_when=lambda: has_upload_permission()
)
diff --git a/cds/modules/invenio_deposit/utils.py b/cds/modules/invenio_deposit/utils.py
index 71b22e05d..1db2d374d 100644
--- a/cds/modules/invenio_deposit/utils.py
+++ b/cds/modules/invenio_deposit/utils.py
@@ -28,6 +28,7 @@
from flask import request
from invenio_oauth2server import require_api_auth, require_oauth_scopes
+from cds.modules.ldap.decorators import require_upload_permission
from .scopes import write_scope
@@ -84,6 +85,7 @@ def check_oauth2_scope(can_method, *myscopes):
def check(record, *args, **kwargs):
@require_api_auth()
+ @require_upload_permission()
@require_oauth_scopes(*myscopes)
def can(self):
return can_method(record)
diff --git a/cds/modules/invenio_deposit/views/ui.py b/cds/modules/invenio_deposit/views/ui.py
index 2d04b33d7..062202326 100644
--- a/cds/modules/invenio_deposit/views/ui.py
+++ b/cds/modules/invenio_deposit/views/ui.py
@@ -27,6 +27,7 @@
from copy import deepcopy
+from cds.modules.ldap.decorators import require_upload_permission
from flask import Blueprint, current_app, render_template, request
from flask_login import login_required
from invenio_pidstore.errors import PIDDeletedError
@@ -73,12 +74,14 @@ def tombstone_errorhandler(error):
@blueprint.route("/deposit")
@login_required
+ @require_upload_permission()
def index():
"""List user deposits."""
return render_template(current_app.config["DEPOSIT_UI_INDEX_TEMPLATE"])
@blueprint.route("/deposit/new")
@login_required
+ @require_upload_permission()
def new():
"""Create new deposit."""
deposit_type = request.values.get("type")
diff --git a/cds/modules/ldap/decorators.py b/cds/modules/ldap/decorators.py
index d2056e252..28e15d556 100644
--- a/cds/modules/ldap/decorators.py
+++ b/cds/modules/ldap/decorators.py
@@ -33,3 +33,20 @@ def decorated_api_view(*args, **kwargs):
abort(401)
return func(*args, **kwargs)
return decorated_api_view
+
+
+def require_upload_permission():
+ """Restrict access using the has_upload_permission check."""
+ def decorator(f):
+ from cds.modules.records.permissions import has_upload_permission
+ @wraps(f)
+ def decorated_function(*args, **kwargs):
+ if not current_user.is_authenticated:
+ abort(401)
+
+ if not has_upload_permission():
+ abort(403)
+
+ return f(*args, **kwargs)
+ return decorated_function
+ return decorator
diff --git a/cds/modules/legacy/__init__.py b/cds/modules/legacy/__init__.py
new file mode 100644
index 000000000..33eb1ff2e
--- /dev/null
+++ b/cds/modules/legacy/__init__.py
@@ -0,0 +1,25 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of CDS.
+# Copyright (C) 2025 CERN.
+#
+# CDS is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of the
+# License, or (at your option) any later version.
+#
+# CDS is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with CDS; if not, write to the
+# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+# MA 02111-1307, USA.
+#
+# In applying this license, CERN does not
+# waive the privileges and immunities granted to it by virtue of its status
+# as an Intergovernmental Organization or submit itself to any jurisdiction.
+
+"""CDS-Videos legacy migration module."""
diff --git a/cds/modules/legacy/alembic/bf9c38b8dabd_create_legacy_branch.py b/cds/modules/legacy/alembic/bf9c38b8dabd_create_legacy_branch.py
new file mode 100644
index 000000000..e325af166
--- /dev/null
+++ b/cds/modules/legacy/alembic/bf9c38b8dabd_create_legacy_branch.py
@@ -0,0 +1,28 @@
+#
+# This file is part of Invenio.
+# Copyright (C) 2025 CERN.
+#
+# Invenio is free software; you can redistribute it and/or modify it
+# under the terms of the MIT License; see LICENSE file for more details.
+
+"""Create legacy branch"""
+
+from alembic import op
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = 'bf9c38b8dabd'
+down_revision = None
+branch_labels = ("legacy",)
+depends_on = '35c1075e6360'
+
+
+def upgrade():
+ """Upgrade database."""
+ pass
+
+
+def downgrade():
+ """Downgrade database."""
+ pass
diff --git a/cds/modules/legacy/alembic/f57e61d833b1_create_legacy_table.py b/cds/modules/legacy/alembic/f57e61d833b1_create_legacy_table.py
new file mode 100644
index 000000000..f0e84db46
--- /dev/null
+++ b/cds/modules/legacy/alembic/f57e61d833b1_create_legacy_table.py
@@ -0,0 +1,39 @@
+#
+# This file is part of Invenio.
+# Copyright (C) 2025 CERN.
+#
+# Invenio is free software; you can redistribute it and/or modify it
+# under the terms of the MIT License; see LICENSE file for more details.
+
+"""Create legacy table."""
+
+from alembic import op
+import sqlalchemy as sa
+import sqlalchemy_utils
+from sqlalchemy.dialects import postgresql
+
+# revision identifiers, used by Alembic.
+revision = 'f57e61d833b1'
+down_revision = 'bf9c38b8dabd'
+branch_labels = ()
+depends_on = None
+
+
+def upgrade():
+ """Upgrade database."""
+ # ### commands auto generated by Alembic - please adjust! ###
+ op.create_table('cds_migration_legacy_records',
+ sa.Column('id', sqlalchemy_utils.types.uuid.UUIDType(), nullable=False),
+ sa.Column('migrated_record_object_uuid', sqlalchemy_utils.types.uuid.UUIDType(), nullable=True, comment='The uuid of the record metadata of the latest record metadata at the time of the migration.'),
+ sa.Column('legacy_recid', sa.Integer(), nullable=True, comment='The record id in the legacy system'),
+ sa.Column('json', sa.JSON().with_variant(postgresql.JSONB(none_as_null=True, astext_type=sa.Text()), 'postgresql'), nullable=True, comment='The extracted information of the legacy record before any transformation.'),
+ sa.PrimaryKeyConstraint('id', name=op.f('pk_cds_migration_legacy_records'))
+ )
+ # ### end Alembic commands ###
+
+
+def downgrade():
+ """Downgrade database."""
+ # ### commands auto generated by Alembic - please adjust! ###
+ op.drop_table('cds_migration_legacy_records')
+ # ### end Alembic commands ###
diff --git a/cds/modules/legacy/minters.py b/cds/modules/legacy/minters.py
new file mode 100644
index 000000000..46103a66f
--- /dev/null
+++ b/cds/modules/legacy/minters.py
@@ -0,0 +1,38 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of CDS.
+# Copyright (C) 2025 CERN.
+#
+# CDS is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of the
+# License, or (at your option) any later version.
+#
+# CDS is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with CDS; if not, write to the
+# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+# MA 02111-1307, USA.
+#
+# In applying this license, CERN does not
+# waive the privileges and immunities granted to it by virtue of its status
+# as an Intergovernmental Organization or submit itself to any jurisdiction.
+
+"""Minters."""
+
+from invenio_pidstore.models import PersistentIdentifier, PIDStatus
+
+
+def legacy_recid_minter(legacy_recid, uuid):
+ """Legacy_recid minter."""
+ PersistentIdentifier.create(
+ pid_type="lrecid",
+ pid_value=legacy_recid,
+ object_type="rec",
+ object_uuid=uuid,
+ status=PIDStatus.REGISTERED,
+ )
diff --git a/cds/modules/legacy/models.py b/cds/modules/legacy/models.py
new file mode 100644
index 000000000..a16caa57b
--- /dev/null
+++ b/cds/modules/legacy/models.py
@@ -0,0 +1,66 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of CDS.
+# Copyright (C) 2025 CERN.
+#
+# CDS is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of the
+# License, or (at your option) any later version.
+#
+# CDS is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with CDS; if not, write to the
+# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+# MA 02111-1307, USA.
+#
+# In applying this license, CERN does not
+# waive the privileges and immunities granted to it by virtue of its status
+# as an Intergovernmental Organization or submit itself to any jurisdiction.
+
+"""CDS Migration models."""
+
+import json
+import uuid
+
+from invenio_db import db
+from sqlalchemy import Column, Integer, String
+from sqlalchemy.dialects import postgresql
+from sqlalchemy_utils.types import UUIDType
+
+
+class CDSMigrationLegacyRecord(db.Model):
+ """Store the extracted legacy information for a specific record."""
+
+ __tablename__ = "cds_migration_legacy_records"
+
+ id = db.Column(
+ UUIDType,
+ primary_key=True,
+ default=uuid.uuid4,
+ )
+ migrated_record_object_uuid = Column(
+ UUIDType,
+ nullable=True,
+ comment="The uuid of the migrated record metadata.",
+ )
+ legacy_recid = Column(
+ Integer, nullable=True, comment="The record id in the legacy system"
+ )
+ json = db.Column(
+ db.JSON().with_variant(
+ postgresql.JSONB(none_as_null=True),
+ "postgresql",
+ ),
+ default=lambda: dict(),
+ nullable=True,
+ comment="The extracted information of the legacy record before any transformation.",
+ )
+
+ def __repr__(self):
+ """Representation of the model."""
+ return f"
"
diff --git a/cds/modules/legacy/redirector.py b/cds/modules/legacy/redirector.py
new file mode 100644
index 000000000..a6a2c6e22
--- /dev/null
+++ b/cds/modules/legacy/redirector.py
@@ -0,0 +1,47 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of CDS.
+# Copyright (C) 2025 CERN.
+#
+# CDS is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of the
+# License, or (at your option) any later version.
+#
+# CDS is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with CDS; if not, write to the
+# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+# MA 02111-1307, USA.
+#
+# In applying this license, CERN does not
+# waive the privileges and immunities granted to it by virtue of its status
+# as an Intergovernmental Organization or submit itself to any jurisdiction.
+
+"""Redirector functions and rules."""
+
+from flask import Blueprint, current_app, abort, redirect
+from sqlalchemy.orm.exc import NoResultFound
+
+from .resolver import get_pid_by_legacy_recid
+
+HTTP_MOVED_PERMANENTLY = 301
+
+blueprint = Blueprint(
+ "cds_legacy", __name__, template_folder="templates", url_prefix="/legacy"
+)
+
+@blueprint.route("/record/", strict_slashes=False)
+def legacy_record_redirect(legacy_id):
+ """Redirect legacy recid."""
+ try:
+ pid = get_pid_by_legacy_recid(legacy_id)
+ except NoResultFound:
+ abort(404)
+
+ url_path = f"{current_app.config['SITE_URL']}/record/{pid.pid_value}"
+ return redirect(url_path, HTTP_MOVED_PERMANENTLY)
\ No newline at end of file
diff --git a/cds/modules/legacy/resolver.py b/cds/modules/legacy/resolver.py
new file mode 100644
index 000000000..eb2c6a1c2
--- /dev/null
+++ b/cds/modules/legacy/resolver.py
@@ -0,0 +1,43 @@
+# -*- coding: utf-8 -*-
+#
+# This file is part of CDS.
+# Copyright (C) 2025 CERN.
+#
+# CDS is free software; you can redistribute it
+# and/or modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation; either version 2 of the
+# License, or (at your option) any later version.
+#
+# CDS is distributed in the hope that it will be
+# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with CDS; if not, write to the
+# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston,
+# MA 02111-1307, USA.
+#
+# In applying this license, CERN does not
+# waive the privileges and immunities granted to it by virtue of its status
+# as an Intergovernmental Organization or submit itself to any jurisdiction.
+
+"""Resolver."""
+
+from invenio_pidstore.models import PersistentIdentifier
+
+
+def get_pid_by_legacy_recid(legacy_recid):
+ """Get record by pid value."""
+ # Get the object uuid from pidstore
+ recid = PersistentIdentifier.query.filter_by(
+ pid_value=legacy_recid, object_type="rec", pid_type="lrecid"
+ ).one()
+
+ # Use the object uuid to get the pid value
+ record_pid = PersistentIdentifier.query.filter_by(
+ object_uuid=recid.object_uuid, object_type="rec", pid_type="recid"
+ ).one()
+
+ return record_pid
+
diff --git a/cds/modules/oauthclient/cern_openid.py b/cds/modules/oauthclient/cern_openid.py
index ae14b7d85..56af1ab73 100644
--- a/cds/modules/oauthclient/cern_openid.py
+++ b/cds/modules/oauthclient/cern_openid.py
@@ -85,8 +85,10 @@ def find_remote_by_client_id(client_id):
def fetch_extra_data(resource):
"""Return a dict with extra data retrieved from CERN OAuth."""
- person_id = resource.get("cern_person_id")
- return dict(person_id=person_id, groups=resource["groups"])
+ data = {"groups": resource.get("groups", [])}
+ if resource.get("cern_person_id"):
+ data["person_id"] = resource["cern_person_id"]
+ return data
def account_roles_and_extra_data(account, resource, refresh_timedelta=None):
@@ -178,10 +180,19 @@ def _account_info(remote, resp):
resp,
)
- email = resource["email"]
- external_id = str(resource["cern_uid"])
- nice = resource["preferred_username"]
- name = resource["name"]
+ email = resource.get("email")
+ if not email:
+ raise OAuthCERNRejectedAccountError("No email in userinfo", remote, resp)
+
+ external_id = resource.get("cern_uid") or resource.get("sub")
+ if not external_id:
+ raise OAuthCERNRejectedAccountError("No external_id in userinfo", remote, resp)
+ external_id = str(external_id)
+ raw_username = resource.get("preferred_username") or email
+ if "@" in raw_username:
+ raw_username = raw_username.replace("@", "_").replace(".", "_")
+ nice = raw_username
+ name = resource.get("name") or nice
return dict(
user=dict(email=email.lower(), profile=dict(username=nice, full_name=name)),
@@ -231,7 +242,7 @@ def account_setup(remote, token, resp):
resource = get_resource(remote, resp)
with db.session.begin_nested():
- external_id = resource.get("cern_uid")
+ external_id = resource.get("cern_uid") or resource.get("sub")
# Set CERN person ID in extra_data.
token.remote_account.extra_data = {"external_id": external_id}
diff --git a/cds/modules/previewer/api.py b/cds/modules/previewer/api.py
index d226f2b7e..8c94f3b4b 100644
--- a/cds/modules/previewer/api.py
+++ b/cds/modules/previewer/api.py
@@ -128,6 +128,18 @@ def vr(self):
"""Get video's VR flag."""
return self.record.get("vr")
+ @property
+ def chapters_uri(self):
+ """Get the chapters.vtt file link if available."""
+ try:
+ return [
+ f["links"]["self"]
+ for f in self.record["_files"]
+ if f.get("context_type") == "chapters" and f.get("content_type") == "vtt"
+ ][0]
+ except IndexError:
+ return None
+
class CDSPreviewDepositFile(PreviewFile):
"""Preview deposit files implementation."""
diff --git a/cds/modules/previewer/extensions/video.py b/cds/modules/previewer/extensions/video.py
index 52ef2c31f..6bbc754e7 100644
--- a/cds/modules/previewer/extensions/video.py
+++ b/cds/modules/previewer/extensions/video.py
@@ -25,6 +25,7 @@
"""Previews video files."""
+from cds.modules.records.utils import parse_video_chapters
from flask import render_template
@@ -63,7 +64,7 @@ def preview(self, file, embed_config=None):
if "report_number" in record and len(record["report_number"])
else ""
)
-
+
return render_template(
self.template,
file=file,
diff --git a/cds/modules/previewer/templates/cds_previewer/macros/player.html b/cds/modules/previewer/templates/cds_previewer/macros/player.html
index 9ec2ac671..bf45ca7fe 100644
--- a/cds/modules/previewer/templates/cds_previewer/macros/player.html
+++ b/cds/modules/previewer/templates/cds_previewer/macros/player.html
@@ -51,45 +51,72 @@
{% endif %}
initialRendition: 'first'
});
- // Preload
- player.source = {
- sources: [
- {
- {% if video_source %}
- src: "{{ video_source }}",
- type: 'application/x-mpegURL'
- {% elif obj.m3u8_uri and obj.subformats|length > 0 %}
- src: '{{ obj.m3u8_uri }}',
- type: 'application/x-mpegURL'
- {% else %}
- src: '{{ obj.uri }}',
- type: 'video/mp4'
- {% endif %}
- },
- ],
+
+ window.top.player = player;
+
+ // --- helpers ---
+ function durationToSeconds(durationStr) {
+ if (!durationStr) return null;
+ const parts = durationStr.split(':').map(Number); // [HH, MM, SS] or [MM, SS]
+ if (parts.length === 3) {
+ return parts[0] * 3600 + parts[1] * 60 + parts[2];
+ }
+ if (parts.length === 2) {
+ return parts[0] * 60 + parts[1];
+ }
+ return null;
+ }
+
+ // Preload
+ player.source = {
+ sources: [
+ {
+ {% if video_source %}
+ src: "{{ video_source }}",
+ type: 'application/x-mpegURL'
+ {% elif obj.m3u8_uri and obj.subformats|length > 0 %}
+ src: '{{ obj.m3u8_uri }}',
+ type: 'application/x-mpegURL'
+ {% else %}
+ src: '{{ obj.uri }}',
+ type: 'video/mp4'
+ {% endif %}
+ },
+ ],
+ textTracks: [
+ {
+ kind: 'metadata',
+ src: '{{ obj.thumbnails_uri }}',
+ label: 'thumbnails',
+ default: true,
+ },
+
+ // Add chapters.vtt if available
+ {% if obj.chapters_uri %}
+ {
+ kind: 'chapters',
+ src: '{{ obj.chapters_uri }}',
+ label: 'Chapters',
+ },
+ {% endif %}
+
+ // Add subtitles
{% if not embed_config.subtitlesOff %}
- textTracks: [
- {
- kind: 'metadata',
- src: '{{ obj.thumbnails_uri }}',
- label: 'thumbnails',
- default: true,
- },
- {% for uri, lang in obj.subtitles %}
- {
- kind: 'subtitles',
- src: '{{ uri }}',
- label: '{{ lang }}',
- srclang: '{{ lang }}',
- {% if embed_config.subtitles and embed_config.subtitles == lang %}
- default: true,
- {% endif %}
- },
- {% endfor %}
- ],
+ {% for uri, lang in obj.subtitles %}
+ {
+ kind: 'subtitles',
+ src: '{{ uri }}',
+ label: '{{ lang }}',
+ srclang: '{{ lang }}',
+ {% if embed_config.subtitles and embed_config.subtitles == lang %}
+ default: true,
+ {% endif %}
+ },
+ {% endfor %}
{% endif %}
- poster: '{{ obj.poster_uri }}',
- {% if obj.vr %}
+ ],
+ poster: '{{ obj.poster_uri }}',
+ {% if obj.vr %}
vr: {
360: true,
},
@@ -162,6 +189,14 @@
}
})(player);
{% endif %}
+ (function() {
+ const params = new URLSearchParams(window.location.search);
+ const videoDuration = durationToSeconds({{ (record.duration if record and record.duration else "") | tojson }});
+ const startTime = parseInt(params.get('t'), 10);
+ if (!isNaN(startTime) && startTime >= 0 && startTime < videoDuration) {
+ player.currentTime = startTime;
+ }
+ })();
{% endif %}
{%- endmacro %}
diff --git a/cds/modules/records/mappings/os-v2/records/videos/video/video-v1.0.0.json b/cds/modules/records/mappings/os-v2/records/videos/video/video-v1.0.0.json
index d2e230a8c..c4c589fcf 100644
--- a/cds/modules/records/mappings/os-v2/records/videos/video/video-v1.0.0.json
+++ b/cds/modules/records/mappings/os-v2/records/videos/video/video-v1.0.0.json
@@ -234,6 +234,9 @@
"recid": {
"type": "double"
},
+ "legacy_recid": {
+ "type": "double"
+ },
"doi": {
"type": "text"
},
@@ -331,6 +334,133 @@
},
"publication_date": {
"type": "text"
+ },
+ "alternate_identifiers": {
+ "properties": {
+ "scheme": {
+ "type": "keyword"
+ },
+ "value": {
+ "type": "keyword"
+ }
+ }
+ },
+ "_curation": {
+ "type": "object",
+ "properties": {
+ "legacy_report_number": {
+ "type": "keyword"
+ },
+ "department": {
+ "type": "keyword"
+ },
+ "volumes": {
+ "type": "text"
+ },
+ "physical_location": {
+ "type": "text"
+ },
+ "physical_medium": {
+ "type": "text"
+ },
+ "internal_note": {
+ "type": "text"
+ },
+ "digitized": {
+ "type": "object",
+ "properties": {
+ "url": {
+ "type": "text"
+ },
+ "format": {
+ "type": "text"
+ },
+ "link_text": {
+ "type": "text"
+ },
+ "public_note": {
+ "type": "text"
+ },
+ "nonpublic_note": {
+ "type": "text"
+ },
+ "md5_checksum": {
+ "type": "text"
+ },
+ "source": {
+ "type": "text"
+ }
+ }
+ },
+ "legacy_marc_fields": {
+ "type": "object",
+ "properties": {
+ "964": {
+ "type": "text"
+ },
+ "336": {
+ "type": "text"
+ },
+ "583": {
+ "type": "text"
+ },
+ "306": {
+ "type": "text"
+ }
+ }
+ }
+ }
+ },
+ "additional_titles": {
+ "type": "object",
+ "properties": {
+ "title": {
+ "type": "text"
+ },
+ "type": {
+ "type": "keyword"
+ },
+ "lang": {
+ "type": "keyword"
+ }
+ }
+ },
+ "additional_descriptions": {
+ "type": "object",
+ "properties": {
+ "description": {
+ "type": "text"
+ },
+ "type": {
+ "type": "keyword"
+ },
+ "lang": {
+ "type": "keyword"
+ }
+ }
+ },
+ "related_identifiers": {
+ "type": "object",
+ "properties": {
+ "identifier": {
+ "type": "text"
+ },
+ "scheme": {
+ "type": "keyword"
+ },
+ "relation_type": {
+ "type": "keyword"
+ },
+ "resource_type": {
+ "type": "keyword"
+ }
+ }
+ },
+ "collections": {
+ "type": "keyword"
+ },
+ "additional_languages": {
+ "type": "text"
}
}
}
diff --git a/cds/modules/records/permissions.py b/cds/modules/records/permissions.py
index 295b7a0cb..9a9b37137 100644
--- a/cds/modules/records/permissions.py
+++ b/cds/modules/records/permissions.py
@@ -25,7 +25,7 @@
from flask import current_app
from flask_security import current_user
-from invenio_access import Permission
+from invenio_access import Permission, action_factory
from invenio_files_rest.models import Bucket, MultipartObject, ObjectVersion
from invenio_records_files.api import FileObject
from invenio_records_files.models import RecordsBuckets
@@ -35,6 +35,8 @@
from .utils import get_user_provides, is_deposit, is_record, lowercase_value
+upload_access_action = action_factory("videos-upload-access")
+
def files_permission_factory(obj, action=None):
"""Permission for files are always based on the type of bucket.
@@ -228,7 +230,7 @@ def can(self):
def create(cls, record, action, user=None):
"""Create a record permission."""
if action in cls.create_actions:
- return cls(record, allow, user)
+ return cls(record, has_upload_permission, user)
elif action in cls.read_actions:
return cls(record, has_read_record_permission, user)
elif action in cls.read_eos_path_actions:
@@ -334,6 +336,9 @@ def has_update_permission(user, record):
"""Check if user has update access to the record."""
user_id = int(user.get_id()) if user.is_authenticated else None
+ if not has_upload_permission():
+ return False
+
# Allow owners
deposit_creator = record.get("_deposit", {}).get("created_by", -1)
if user_id == deposit_creator:
@@ -359,3 +364,8 @@ def has_admin_permission(user=None, record=None):
"""
# Allow administrators
return Permission(action_admin_access).can()
+
+
+def has_upload_permission(*args, **kwargs):
+ """Return permission to allow only cern users."""
+ return Permission(upload_access_action).can()
\ No newline at end of file
diff --git a/cds/modules/records/schemas/records/videos/project/project-v1.0.0.json b/cds/modules/records/schemas/records/videos/project/project-v1.0.0.json
index 756fc1b09..107b09231 100644
--- a/cds/modules/records/schemas/records/videos/project/project-v1.0.0.json
+++ b/cds/modules/records/schemas/records/videos/project/project-v1.0.0.json
@@ -139,6 +139,7 @@
"Camera Operator",
"Comments by",
"Co-Producer",
+ "ContactPerson",
"Creator",
"Credits",
"Director",
@@ -147,7 +148,9 @@
"Narrator",
"Photography",
"Producer",
+ "RelatedPerson",
"Reporter",
+ "ResearchGroup",
"Screenwriter",
"Speaker",
"Subtitles by",
diff --git a/cds/modules/records/schemas/records/videos/video/definitions-v1.0.0.json b/cds/modules/records/schemas/records/videos/video/definitions-v1.0.0.json
new file mode 100644
index 000000000..e1c217180
--- /dev/null
+++ b/cds/modules/records/schemas/records/videos/video/definitions-v1.0.0.json
@@ -0,0 +1,57 @@
+{
+ "languages": {
+ "enum": [
+ "ar",
+ "ast",
+ "bg",
+ "ca",
+ "ch",
+ "cs",
+ "cy",
+ "da",
+ "de",
+ "el",
+ "en",
+ "en-fr",
+ "es",
+ "et",
+ "eu",
+ "fi",
+ "fr",
+ "ga",
+ "gd",
+ "gl",
+ "he",
+ "hi",
+ "hr",
+ "hu",
+ "it",
+ "lt",
+ "ja",
+ "ka",
+ "ko",
+ "kw",
+ "nb",
+ "nl",
+ "nn",
+ "no",
+ "pl",
+ "pt",
+ "rm",
+ "ro",
+ "ru",
+ "se",
+ "silent",
+ "sk",
+ "sl",
+ "sr",
+ "sv",
+ "tr",
+ "uk",
+ "ur",
+ "zh",
+ "zh_CN",
+ "zh_TW"
+ ]
+ }
+}
diff --git a/cds/modules/records/schemas/records/videos/video/video-v1.0.0.json b/cds/modules/records/schemas/records/videos/video/video-v1.0.0.json
index 5ace8d1d3..84ecfcde0 100644
--- a/cds/modules/records/schemas/records/videos/video/video-v1.0.0.json
+++ b/cds/modules/records/schemas/records/videos/video/video-v1.0.0.json
@@ -62,6 +62,7 @@
"Camera Operator",
"Comments by",
"Co-Producer",
+ "ContactPerson",
"Creator",
"Credits",
"Director",
@@ -70,7 +71,9 @@
"Narrator",
"Photography",
"Producer",
+ "RelatedPerson",
"Reporter",
+ "ResearchGroup",
"Screenwriter",
"Speaker",
"Subtitles by",
@@ -201,6 +204,24 @@
"description": "List of identifiers on external systems.",
"title": "External identifiers"
},
+ "alternate_identifiers": {
+ "items": {
+ "properties": {
+ "scheme": {
+ "title": "Scheme of the identifier (Vocabulary)",
+ "type": "string",
+ "enum": ["URL", "DOI", "CDS"]
+ },
+ "value": {
+ "title": "Value of the identifier",
+ "type": "string"
+ }
+ }
+ },
+ "required": ["value", "scheme"],
+ "title": "List of alternate identifiers of the record",
+ "type": "array"
+ },
"subject": {
"additionalProperties": false,
"description": "Subject.",
@@ -400,59 +421,7 @@
},
"language": {
"default": "en",
- "enum": [
- "ar",
- "ast",
- "bg",
- "ca",
- "ch",
- "cs",
- "cy",
- "da",
- "de",
- "el",
- "en",
- "en-fr",
- "es",
- "et",
- "eu",
- "fi",
- "fr",
- "ga",
- "gd",
- "gl",
- "he",
- "hi",
- "hr",
- "hu",
- "it",
- "lt",
- "ja",
- "ka",
- "ko",
- "kw",
- "nb",
- "nl",
- "nn",
- "no",
- "pl",
- "pt",
- "rm",
- "ro",
- "ru",
- "se",
- "silent",
- "sk",
- "sl",
- "sr",
- "sv",
- "tr",
- "uk",
- "ur",
- "zh",
- "zh_CN",
- "zh_TW"
- ],
+ "$ref": "definitions-v1.0.0.json#/languages",
"type": "string",
"description": "A language of the resource."
},
@@ -538,59 +507,7 @@
"language": {
"description": "A language of the resource.",
"default": "en",
- "enum": [
- "ar",
- "ast",
- "bg",
- "ca",
- "ch",
- "cs",
- "cy",
- "da",
- "de",
- "el",
- "en",
- "en-fr",
- "es",
- "et",
- "eu",
- "fi",
- "fr",
- "ga",
- "gd",
- "gl",
- "he",
- "hi",
- "hr",
- "hu",
- "it",
- "lt",
- "ja",
- "ka",
- "ko",
- "kw",
- "nb",
- "nl",
- "nn",
- "no",
- "pl",
- "pt",
- "rm",
- "ro",
- "ru",
- "se",
- "silent",
- "sk",
- "sl",
- "sr",
- "sv",
- "tr",
- "uk",
- "ur",
- "zh",
- "zh_CN",
- "zh_TW"
- ],
+ "$ref": "definitions-v1.0.0.json#/languages",
"type": "string"
},
"accelerator_experiment": {
@@ -631,11 +548,218 @@
"type": "number",
"description": "Invenio record identifier (integer)."
},
+ "legacy_recid": {
+ "type": "number",
+ "description": "Legacy record identifier (integer). Kept for auditing reasons."
+ },
"original_source": {
"type": "string"
},
"_project_id": {
"type": "string"
+ },
+ "_curation": {
+ "properties": {
+ "legacy_report_number": {
+ "title": "Legacy record report number.",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "department": {
+ "title": "CERN department.",
+ "type": "string"
+ },
+ "volumes": {
+ "title": "Volume list for this record.",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "physical_location": {
+ "title": "Tag 852 physical location.",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "physical_medium": {
+ "title": "Tag 340 physical medium.",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "internal_note": {
+ "title": "Tag 595 internal note.",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "digitized": {
+ "title": "Digitized metadata.",
+ "type": "array",
+ "items": {
+ "type": "object",
+ "properties": {
+ "url": {
+ "type": "string"
+ },
+ "format": {
+ "type": "string"
+ },
+ "link_text": {
+ "type": "string"
+ },
+ "public_note": {
+ "type": "string"
+ },
+ "nonpublic_note": {
+ "type": "string"
+ },
+ "md5_checksum": {
+ "type": "string"
+ },
+ "source": {
+ "type": "string"
+ }
+ }
+ }
+ },
+ "legacy_marc_fields": {
+ "type": "object",
+ "properties": {
+ "964": {
+ "title": "Tag 964.",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "336": {
+ "title": "Tag 336.",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "583": {
+ "title": "Tag 583.",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ },
+ "306": {
+ "title": "Tag 306.",
+ "type": "array",
+ "items": {
+ "type": "string"
+ }
+ }
+ }
+ }
+ },
+ "title": "Fields that needs curation.",
+ "description": "This section contains MARC21 metadata fields that could not be mapped during weblectures migration.",
+ "type": "object"
+ },
+ "additional_titles": {
+ "description": "Additional record titles.",
+ "type": "array",
+ "items": {
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "title": {
+ "description": "Additional title of the record.",
+ "type": "string"
+ },
+ "type": {
+ "type": "string",
+ "enum": ["Subtitle", "Other", "TranslatedTitle", "AlternativeTitle"]
+ },
+ "lang": {
+ "type": "string",
+ "$ref": "definitions-v1.0.0.json#/languages"
+ }
+ }
+ }
+ },
+ "additional_descriptions": {
+ "description": "Additional descriptions for the record.",
+ "type": "array",
+ "items": {
+ "type": "object",
+ "additionalProperties": false,
+ "properties": {
+ "description": {
+ "type": "string",
+ "description": "Descriptive content."
+ },
+ "type": {
+ "type": "string",
+ "enum": [
+ "Abstract",
+ "Methods",
+ "Other",
+ "SeriesInformation",
+ "TableOfContents",
+ "TechnicalInfo"
+ ]
+ },
+ "lang": {
+ "type": "string",
+ "$ref": "definitions-v1.0.0.json#/languages"
+ }
+ }
+ }
+ },
+ "related_identifiers": {
+ "type": "array",
+ "items": {
+ "type": "object",
+ "required": ["identifier", "scheme", "relation_type"],
+ "additionalProperties": false,
+ "properties": {
+ "identifier": {
+ "type": "string",
+ "description": "The actual identifier (e.g., URL or DOI)."
+ },
+ "scheme": {
+ "type": "string",
+ "enum": ["URL", "DOI", "CDS", "Indico"],
+ "description": "The scheme describing the identifier type."
+ },
+ "relation_type": {
+ "type": "string",
+ "enum": ["IsPartOf", "IsVariantFormOf"],
+ "description": "Describes the relationship with the current record."
+ },
+ "resource_type": {
+ "type": "string",
+ "enum": ["Event", "ConferencePaper", "Report", "Book"],
+ "description": "Type of the related resource."
+ }
+ }
+ }
+ },
+ "collections": {
+ "items": {
+ "type": "string"
+ },
+ "type": "array"
+ },
+ "additional_languages": {
+ "description": "Additional languages for the record.",
+ "type": "array",
+ "items": {
+ "type": "string",
+ "$ref": "definitions-v1.0.0.json#/languages"
+ }
}
},
"title": "CDS Base Record Schema v1.0.0"
diff --git a/cds/modules/records/serializers/json.py b/cds/modules/records/serializers/json.py
index 9524d15f9..0a05d85de 100644
--- a/cds/modules/records/serializers/json.py
+++ b/cds/modules/records/serializers/json.py
@@ -31,7 +31,7 @@
has_read_record_eos_path_permission,
has_read_record_permission,
)
-from ..utils import HTMLTagRemover, remove_html_tags
+from ..utils import HTMLTagRemover, parse_video_chapters, remove_html_tags
class CDSJSONSerializer(JSONSerializer):
@@ -81,6 +81,12 @@ def preprocess_record(self, pid, record, links_factory=None):
except KeyError:
# ignore error if keys are missing in the metadata
pass
+
+ description = metadata.get('description', '')
+ if description:
+ metadata['chapters'] = parse_video_chapters(description)
+ else:
+ metadata['chapters'] = []
return result
diff --git a/cds/modules/records/serializers/schemas/common.py b/cds/modules/records/serializers/schemas/common.py
index bfae055f4..e0e03634f 100644
--- a/cds/modules/records/serializers/schemas/common.py
+++ b/cds/modules/records/serializers/schemas/common.py
@@ -155,3 +155,63 @@ class ExternalSystemIdentifiersField(StrictKeysSchema):
value = fields.Str()
schema = fields.Str()
+
+
+class AlternateIdentifiersSchema(StrictKeysSchema):
+ """Field alternate_identifiers."""
+
+ value = fields.Str(required=True)
+ scheme = fields.Str(required=True)
+
+
+class LegacyMARCFieldsSchema(Schema):
+ tag_964 = fields.List(fields.Str(), data_key="964")
+ tag_336 = fields.List(fields.Str(), data_key="336")
+ tag_583 = fields.List(fields.Str(), data_key="583")
+ tag_306 = fields.List(fields.Str(), data_key="306")
+
+
+class DigitizedMetadataSchema(Schema):
+ url = fields.Str()
+ format = fields.Str()
+ link_text = fields.Str()
+ public_note = fields.Str()
+ nonpublic_note = fields.Str()
+ md5_checksum = fields.Str()
+ source = fields.Str()
+
+
+class CurationSchema(StrictKeysSchema):
+ """Curation schema."""
+
+ legacy_report_number = fields.List(fields.Str())
+ department = fields.Str()
+ volumes = fields.List(fields.Str())
+ physical_location = fields.List(fields.Str())
+ physical_medium = fields.List(fields.Str())
+ internal_note = fields.List(fields.Str())
+ legacy_marc_fields = fields.Nested(LegacyMARCFieldsSchema)
+ digitized = fields.Nested(DigitizedMetadataSchema)
+
+
+class AdditionalTitlesSchema(Schema):
+ """Additional titles schema."""
+
+ title = fields.Str()
+ type = fields.Str()
+ lang = fields.Str()
+
+
+class AdditionalDescriptionsSchema(Schema):
+ """Additional descriptions schema."""
+
+ description = fields.Str()
+ type = fields.Str()
+ lang = fields.Str()
+
+
+class RelatedIdentifiersSchema(Schema):
+ identifier = fields.Str(required=True)
+ scheme = fields.Str(required=True)
+ relation_type = fields.Str(required=True)
+ resource_type = fields.Str()
\ No newline at end of file
diff --git a/cds/modules/records/serializers/schemas/datacite.py b/cds/modules/records/serializers/schemas/datacite.py
index 3c991b249..44776fe03 100644
--- a/cds/modules/records/serializers/schemas/datacite.py
+++ b/cds/modules/records/serializers/schemas/datacite.py
@@ -53,6 +53,7 @@ class DataCiteSchemaV1(Schema):
"""DataCite schema v1."""
creators = fields.Method("get_creators")
+ contributors = fields.Method("get_contributors")
dates = fields.Method("get_dates")
descriptions = fields.Method("get_descriptions")
identifier = fields.Nested(IdentifierSchema, attribute="metadata.doi")
@@ -101,23 +102,27 @@ def get_creators(self, obj):
"""Get creators."""
items = []
for item in obj["metadata"].get("contributors", []):
- items.append(
- {
- "creatorName": item.get("name", ""),
- }
+ if item.get("role", "") != "ResearchGroup":
+ items.append(
+ {
+ "creatorName": item.get("name", ""),
+ }
)
return items
- # def get_contributors(self, obj):
- # """Get contributors."""
- # items = []
- # for item in obj['metadata'].get('contributors', []):
- # items.append({
- # 'contributorType': item.get('role', ''),
- # 'contributorName': item.get('name', ''),
- # # FIXME nameIdentifier and nameIdentifierScheme, ... ?
- # })
- # return items
+ def get_contributors(self, obj):
+ """Get contributors."""
+ items = []
+ for item in obj['metadata'].get('contributors', []):
+ if item.get("role", "") == "ResearchGroup":
+ items.append(
+ {
+ 'contributorType': item.get('role', ''),
+ 'contributorName': item.get('name', ''),
+ # FIXME nameIdentifier and nameIdentifierScheme, ... ?
+ }
+ )
+ return items
def get_publication_year(self, obj):
"""Get publication year."""
diff --git a/cds/modules/records/serializers/schemas/video.py b/cds/modules/records/serializers/schemas/video.py
index c83e2e83a..66e9f763f 100644
--- a/cds/modules/records/serializers/schemas/video.py
+++ b/cds/modules/records/serializers/schemas/video.py
@@ -19,25 +19,31 @@
"""Video JSON schema."""
from invenio_jsonschemas import current_jsonschemas
-from marshmallow import Schema, fields, pre_load, post_load
+from marshmallow import Schema, fields, pre_load, post_load, post_dump
from ....deposit.api import Video
from ..fields.datetime import DateString
from .common import (
AccessSchema,
+ AdditionalTitlesSchema,
+ AdditionalDescriptionsSchema,
+ AlternateIdentifiersSchema,
BucketSchema,
ContributorSchema,
+ CurationSchema,
DepositSchema,
ExternalSystemIdentifiersField,
KeywordsSchema,
LicenseSchema,
OaiSchema,
+ RelatedIdentifiersSchema,
RelatedLinksSchema,
StrictKeysSchema,
TitleSchema,
TranslationsSchema,
)
from .doi import DOI
+from ...utils import parse_video_chapters
class _CDSSSchema(Schema):
@@ -142,6 +148,7 @@ class VideoSchema(StrictKeysSchema):
note = fields.Str()
publication_date = fields.Str()
recid = fields.Number()
+ legacy_recid =fields.Number()
related_links = fields.Nested(RelatedLinksSchema, many=True)
report_number = fields.List(fields.Str, many=True)
schema = fields.Str(attribute="$schema", data_key="$schema")
@@ -149,7 +156,18 @@ class VideoSchema(StrictKeysSchema):
translations = fields.Nested(TranslationsSchema, many=True)
type = fields.Str()
vr = fields.Boolean()
-
+ _curation = fields.Nested(CurationSchema)
+ additional_titles = fields.List(fields.Nested(AdditionalTitlesSchema))
+ additional_descriptions = fields.List(fields.Nested(AdditionalDescriptionsSchema))
+ alternate_identifiers = fields.Nested(
+ AlternateIdentifiersSchema, many=True
+ )
+ related_identifiers = fields.Nested(
+ RelatedIdentifiersSchema, many=True
+ )
+ collections = fields.List(fields.Str, many=True)
+ additional_languages = fields.List(fields.Str, many=True)
+
# Preservation fields
location = fields.Str()
original_source = fields.Str()
@@ -160,3 +178,4 @@ def post_load(self, data, **kwargs):
"""Post load."""
data["$schema"] = current_jsonschemas.path_to_url(Video._schema)
return data
+
diff --git a/cds/modules/records/static/templates/cds_records/video/detail.html b/cds/modules/records/static/templates/cds_records/video/detail.html
index c253f1a43..e7fbc15de 100644
--- a/cds/modules/records/static/templates/cds_records/video/detail.html
+++ b/cds/modules/records/static/templates/cds_records/video/detail.html
@@ -4,14 +4,119 @@
-
-
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ -
+
+
+
+
![Chapter {{ chapter.timestamp }}]()
+
+
+
+
+
+
+
+
+ {{ cleanHtmlFromTitle(chapter.title) }}
+
+
+ {{ chapter.timestamp }}
+
+
+
+
+
+
+
+
+
+
@@ -64,7 +169,7 @@
-
+
{{translation.language | isoToLanguage}}
{{translation.title.title}}
@@ -130,7 +235,7 @@
{{translation.title.title}}
-
+
@@ -181,6 +286,7 @@
{{translation.title.title}}
+
@@ -199,9 +305,81 @@ {{translation.title.title}}
+
+
+
+
+
+
Chapters
+
+
+
+
+
+
+
+
+
+
![Chapter {{ chapter.timestamp }}]()
+
+
+
+
+
+ {{ chapter.timestamp }}
+
+
+
+
+
+ {{ cleanHtmlFromTitle(chapter.title) }}
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Transcriptions
+ Follow along or search within the transcript.
+
+
+
+
+
+
+
+
+
@@ -212,10 +390,9 @@
-
-
+
diff --git a/cds/modules/records/static/templates/cds_records/video/downloads.html b/cds/modules/records/static/templates/cds_records/video/downloads.html
index c5fc4888b..26ddb78e1 100644
--- a/cds/modules/records/static/templates/cds_records/video/downloads.html
+++ b/cds/modules/records/static/templates/cds_records/video/downloads.html
@@ -86,27 +86,27 @@
-
-