diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index 72dad9104..96e97e038 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -9,9 +9,13 @@ name: CI on: push: - branches: main + branches: + - main pull_request: - branches: main + branches: + - main + - additional-files + schedule: # * is a special character in YAML so you have to quote this string - cron: "0 4 * * 6" diff --git a/Bruno Collection - CDS Videos Publish Video.json b/Bruno Collection - CDS Videos Publish Video.json index fb6898f95..a1aed1578 100644 --- a/Bruno Collection - CDS Videos Publish Video.json +++ b/Bruno Collection - CDS Videos Publish Video.json @@ -4,38 +4,8 @@ "items": [ { "type": "http", - "name": "Step 6: Get Project to Check the Flow Status", + "name": "Optional- Update the Access of the Video", "seq": 7, - "request": { - "url": "{{baseURL}}/api/deposits/project/{{project_id}}", - "method": "GET", - "headers": [ - { - "name": "content-type", - "value": "application/vnd.project.partial+json", - "enabled": true - } - ], - "params": [], - "body": { - "mode": "json", - "json": "", - "formUrlEncoded": [], - "multipartForm": [] - }, - "script": {}, - "vars": {}, - "assertions": [], - "tests": "", - "auth": { - "mode": "inherit" - } - } - }, - { - "type": "http", - "name": "Optional: Update the Access of the Video", - "seq": 6, "request": { "url": "{{baseURL}}/api/deposits/video/{{video_id}}", "method": "PUT", @@ -79,7 +49,7 @@ "params": [], "body": { "mode": "json", - "json": "{\n \"$schema\": \"https://localhost:5000/schemas/deposits/records/videos/project/project-v1.0.0.json\",\n \"_access\": {\n \"update\": [\n \"admin@test.ch\",\n \"your-egroup@cern.ch\"\n ],\n \"read\": [ // If you want to restrict the project, add access read\n \"your-egroup@cern.ch\"\n ]\n },\n // Add category and type\n \"category\": \"ATLAS\",\n \"type\": \"VIDEO\"\n}", + "json": "{\n \"_access\": {\n \"update\": [\n \"admin@test.ch\",\n \"your-egroup@cern.ch\"\n ],\n \"read\": [ // If you want to restrict the project, add access read\n \"your-egroup@cern.ch\"\n ]\n },\n // Add category and type\n \"category\": \"ATLAS\",\n \"type\": \"VIDEO\"\n}", "formUrlEncoded": [], "multipartForm": [] }, @@ -111,7 +81,7 @@ "params": [], "body": { "mode": "json", - "json": "{\n \"$schema\":\"https://localhost:5000/schemas/deposits/records/videos/video/video-v1.0.0.json\",\n \"_project_id\":\"{{project_id}}\",\n \"title\":\n {\n \"title\":\"your_title\"\n },\n \"_access\": {\n \"read\": [\n \"your-egroup@cern.ch\"\n ]\n },\n \"vr\": false,\n \"featured\": false,\n \"language\": \"en\",\n \"contributors\": [\n {\n \"name\": \"Surname, Name\",\n \"ids\": [\n {\n \"value\": \"cern id\",\n \"source\": \"cern\"\n }\n ],\n \"email\": \"test@cern.ch\",\n \"role\": \"Co-Producer\"\n }\n ],\n \"description\": \"Description\",\n \"date\": \"2024-11-12\",\n \"keywords\":[\n {\n \"name\": \"keyword\",\n \"value\": {\n \"name\": \"keyword\"\n }\n },\n {\n \"name\": \"keyword2\",\n \"value\": {\n \"name\": \"keyword2\"\n }\n }\n ],\n \"related_links\":[\n {\n \"name\": \"related link\",\n \"url\": \"https://relatedlink\"\n }\n ]\n}", + "json": "{\n \"_project_id\":\"{{project_id}}\",\n \"language\":\"en\",\n \"title\":\n {\n \"title\":\"your_title\"\n },\n \"_access\": {\n \"read\": [\n \"your-egroup@cern.ch\"\n ]\n },\n \"vr\": false,\n \"featured\": false,\n \"language\": \"en\",\n \"contributors\": [\n {\n \"name\": \"Surname, Name\",\n \"ids\": [\n {\n \"value\": \"cern id\",\n \"source\": \"cern\"\n }\n ],\n \"email\": \"test@cern.ch\",\n \"role\": \"Co-Producer\"\n }\n ],\n \"description\": \"Description\",\n \"date\": \"2024-11-12\",\n \"keywords\":[\n {\n \"name\": \"keyword\",\n \"value\": {\n \"name\": \"keyword\"\n }\n },\n {\n \"name\": \"keyword2\",\n \"value\": {\n \"name\": \"keyword2\"\n }\n }\n ],\n \"related_links\":[\n {\n \"name\": \"related link\",\n \"url\": \"https://relatedlink\"\n }\n ]\n}", "formUrlEncoded": [], "multipartForm": [] }, @@ -184,7 +154,9 @@ "formUrlEncoded": [], "multipartForm": [] }, - "script": {}, + "script": { + "res": "let data = res.body;\nbru.setEnvVar(\"main_video_key\", data.key);" + }, "vars": {}, "assertions": [], "tests": "", @@ -195,12 +167,18 @@ }, { "type": "http", - "name": "Step 5: Upload additional file", + "name": "Optional: Upload additional file", "seq": 5, "request": { "url": "{{baseURL}}/api/files/{{bucket_id}}/{{additional_file}}", "method": "PUT", - "headers": [], + "headers": [ + { + "name": "X-Invenio-File-Tags", + "value": "context_type=additional_file", + "enabled": true + } + ], "params": [], "body": { "mode": "json", @@ -221,8 +199,38 @@ }, { "type": "http", - "name": "Step 7: Publish", + "name": "Step 5: Get Video to Check the Flow Status", "seq": 8, + "request": { + "url": "{{baseURL}}/api/deposits/video/{{video_id}}", + "method": "GET", + "headers": [ + { + "name": "content-type", + "value": "application/vnd.project.partial+json", + "enabled": true + } + ], + "params": [], + "body": { + "mode": "json", + "json": "", + "formUrlEncoded": [], + "multipartForm": [] + }, + "script": {}, + "vars": {}, + "assertions": [], + "tests": "", + "auth": { + "mode": "inherit" + } + } + }, + { + "type": "http", + "name": "Step 6- Publish", + "seq": 9, "request": { "url": "{{baseURL}}/api/deposits/video/{{video_id}}/actions/publish", "method": "POST", @@ -248,9 +256,40 @@ "mode": "inherit" } } + }, + { + "type": "http", + "name": "Optional: Replace Main File", + "seq": 6, + "request": { + "url": "{{baseURL}}/api/files/{{bucket_id}}/{{main_video_key}}", + "method": "PUT", + "headers": [ + { + "name": "X-Invenio-File-Tags", + "value": "times_replaced=1", + "enabled": true + } + ], + "params": [], + "body": { + "mode": "json", + "json": "", + "formUrlEncoded": [], + "multipartForm": [] + }, + "script": { + "req": "const fs = require('fs');\nconst path = require('path');\n\n// File details\nconst filename = \"video_name.mp4\"; // CHANGE HERE\nconst filePath = \"video_file_path\"; // CHANGE HERE\n\n// Read the file as raw binary data\nconst fileContent = fs.readFileSync(filePath);\n\n// Set request headers\nreq.setHeader(\"Accept\", \"application/json, text/plain, */*\"); \nreq.setHeader(\"Accept-Encoding\", \"gzip, deflate, br, zstd\"); \nreq.setHeader(\"Content-Length\", fileContent.length);\n\n// Attach the file content as the request body\nreq.setBody(fileContent);\n" + }, + "vars": {}, + "assertions": [], + "tests": "", + "auth": { + "mode": "inherit" + } + } } ], - "activeEnvironmentUid": "O01m8KLYsgrkGRjOSv443", "environments": [ { "variables": [ @@ -284,9 +323,6 @@ "allow": true } }, - "ignore": [ - "node_modules", - ".git" - ] + "ignore": ["node_modules", ".git"] } -} \ No newline at end of file +} diff --git a/README.rst b/README.rst index b27d6d768..6fb1036a2 100644 --- a/README.rst +++ b/README.rst @@ -33,10 +33,13 @@ Table of Contents - `Step 2: Create a Video <#step-2-create-a-video>`_ - `Step 3: Upload the Video <#step-3-upload-the-video>`_ - `Step 4: Create a Flow <#step-4-create-a-flow>`_ - - `Step 5: (Optional) Upload Additional File <#step-5-optional-upload-additional-file>`_ + - `Optional: Upload Additional File <#optional-upload-additional-file>`_ - `Optional: Update the Access of the Video <#optional-update-the-access-of-the-video>`_ - - `Step 6: Get Project to Check the Flow Status <#step-6-get-project-to-check-the-flow-status>`_ - - `Step 7: Publish Video <#step-7-publish-video>`_ + - `Step 5: Get Video to Check the Flow Status <#step-5-get-video-to-check-the-flow-status>`_ + - `Step 6: Publish Video <#step-6-publish-video>`_ +- `Replace the Main Video File through REST API <#replace-the-main-video-file-through-rest-api>`_ + - `General Flow <#general-flow>`_ + - `Alternative: Without Doing the Get Request <#alternative-without-doing-the-get-request>`_ Prerequisites @@ -228,6 +231,7 @@ If you'd like to use the pre-configured REST API collection in Bruno, ensure you - Download this `Bruno collection <./Bruno%20Collection%20-%20CDS%20Videos%20Publish%20Video.json>`_. - Open Bruno and import downloaded collection. + - Switch to **Developer Mode**. - Create an environment for the collection. - Configure the environment by adding a variable named ``baseURL``. Set its value to your API base URL (e.g., ``http://localhost:5000``). @@ -258,11 +262,6 @@ Step 1: Create a Project - **Location** - **Description** - **Required/Optional** - * - **$schema** - - string - - body - - Schema URL for the project creation. - - Required * - **category** - string - body @@ -307,7 +306,6 @@ To restrict the project, add ``_access/read``: .. code-block:: json { - "$schema": "https://localhost:5000/schemas/deposits/records/videos/project/project-v1.0.0.json", "_access": { "update": [ "admin@test.ch", @@ -355,7 +353,7 @@ To restrict the project, add ``_access/read``: **Response:** -Created project JSON. +Created project JSON. Save ``response.body.project_id`` as ``_project_id`` for later use. Step 2: Create a Video @@ -379,11 +377,6 @@ Step 2: Create a Video - **Location** - **Description** - **Required/Optional** - * - **$schema** - - string - - body - - Schema URL for video creation. - - Required * - **_project_id** - string - body @@ -423,7 +416,7 @@ Step 2: Create a Video - string - body - Language of the video. - - Optional + - Required * - **featured** - boolean - body @@ -447,7 +440,6 @@ To restrict the video, add ``_access/read``. The ``_access/update`` will be the .. code-block:: json { - "$schema":"https://localhost:5000/schemas/deposits/records/videos/video/video-v1.0.0.json", "_project_id":"{{project_id}}", "title": { @@ -495,12 +487,13 @@ To restrict the video, add ``_access/read``. The ``_access/update`` will be the "name": "related link", "url": "https://relatedlink" } - ] + ], + "language": "en" } **Response:** -Created video JSON. +Created video JSON. Save ``response.body.id`` as ``video_id`` and ``response.body.metadata._buckets.deposit`` as ``bucket_id`` for later use. Step 3: Upload the Video @@ -542,7 +535,7 @@ Step 3: Upload the Video **Response:** -Uploaded video JSON. +Uploaded video JSON. Save ``response.body.version_id`` as ``main_file_version_id`` and ``response.body.key`` as ``video_key`` for later use. Step 4: Create a Flow @@ -595,16 +588,20 @@ Step 4: Create a Flow **Response:** -Created flow JSON. +Created flow JSON. If you want to replace the main video file later, save ``response.body.key`` as ``main_video_key``. -Step 5: (Optional) Upload Additional File +Optional: Upload Additional File ------------------------------------------ **Request:** ``PUT`` ``{{baseURL}}/api/files/{{bucket_id}}/{{additional_file}}`` +**Headers:** + +- ``X-Invenio-File-Tags: context_type=additional_file`` + **Parameters:** .. list-table:: @@ -629,10 +626,6 @@ Step 5: (Optional) Upload Additional File - To include the file in the body, modify the `pre-request script` in Bruno. -**Response:** - -Uploaded additional file JSON. - Optional: Update the Access of the Video ---------------------------------------- @@ -682,12 +675,12 @@ To restrict the video, add ``_access/read``. If you want to change the access/up Updated video JSON. -Step 6: Get Project to Check the Flow Status +Step 5: Get Video to Check the Flow Status -------------------------------------------- **Request:** -``GET`` ``{{baseURL}}/api/deposits/project/{{project_id}}`` +``GET`` ``{{baseURL}}/api/deposits/video/{{video_id}}`` **Headers:** @@ -702,19 +695,18 @@ Step 6: Get Project to Check the Flow Status - **Type** - **Location** - **Description** - * - **project_id** + * - **video_id** - string - path - - ID of the project. + - ID of the video. **Response:** -Updated project JSON with flow status as ``state``: +Updated video JSON with flow status. You can find the flow status in ``response.body.metadata._cds.state``: .. code-block:: json { - "id": "b320568fc1264dda90a8f459be42892e", "_cds": { "state": { "file_transcode": "STARTED", @@ -725,7 +717,7 @@ Updated project JSON with flow status as ``state``: } -Step 7: Publish Video +Step 6: Publish Video ---------------------- Before publishing the video, ensure that the workflow is complete. @@ -757,3 +749,180 @@ Before publishing the video, ensure that the workflow is complete. Published video deposit JSON. + +Replace the Main Video File through REST API +============================================ + +General Flow +------------ + +1. Get the video (see `Step 5 <#step-5-get-video-to-check-the-flow-status>`_) and find the master file key from the response. + + **Request:** + + ``GET {{baseURL}}/api/deposits/video/{{video_id}}`` + + **Headers:** + + - ``content-type: application/vnd.project.partial+json`` + + **Parameters:** + + .. list-table:: + :header-rows: 1 + + * - **Name** + - **Type** + - **Location** + - **Description** + * - **video_id** + - string + - path + - ID of the video. + + **Response:** + + Video JSON. You can find the main file inside ``response.body.metadata._files``. + + .. code-block:: javascript + + let files = data.metadata?._files || []; + // Find the master file + let masterFile = files.find(f => f.context_type === "master"); + video_key = masterFile.key; + + +2. Upload the new video with the same master key and same ``bucket_id`` (see `Step 3 <#step-3-upload-the-video>`_) + + **Upload Request** + + ``PUT {{baseURL}}/api/files/{{bucket_id}}/{{main_video_key}}`` + + **Headers:** + + - ``X-Invenio-File-Tags: times_replaced=number_of_times_replaced`` + + **Parameters:** + + .. list-table:: + :header-rows: 1 + + * - **Name** + - **Type** + - **Location** + - **Description** + * - **bucket_id** + - string + - path + - ID of the bucket to upload the file. + * - **main_video_key** + - string + - path + - Key of the previous main file. + * - **file** + - file + - body + - The file to be uploaded. + + **Response:** + + Uploaded file JSON. Save version_id and key for later use: + + - ``response.body.version_id`` → ``version_id`` + - ``response.body.key`` → ``video_key`` + + + +3. Start the flow with your new ``video_key`` and ``version_id`` but keep the same ``bucket_id`` and ``deposit_id`` (see `Step 4 <#step-4-create-a-flow>`_) + + **Request:** + + ``POST /api/flows/`` + + **Headers:** + + - ``content-type: application/vnd.project.partial+json`` + + **Parameters:** + + .. list-table:: + :header-rows: 1 + + * - **Name** + - **Type** + - **Location** + - **Description** + * - **version_id** + - string + - body + - Version ID from the uploaded video response. + * - **key** + - string + - body + - Video key from the uploaded video response. + * - **bucket_id** + - string + - body + - Bucket ID from the Create Video response. + * - **deposit_id** + - string + - body + - Deposit ID from the Create Video response. + + **Body:** + + .. code-block:: json + + { + "version_id": "{{main_file_version_id}}", + "key": "{{video_key}}", + "bucket_id": "{{bucket_id}}", + "deposit_id": "{{video_id}}" + } + + +Alternative: Without Doing the Get Request +------------------------------------------ + +If you want to integrate this process into your workflow **without calling the Get Video request**, +you must be careful about which **video key** you are using, since it changes during different stages. + +**⚠️ Important: Using the Correct Video Key** + +The ``video_key`` changes and you must use the correct key depending on when you're performing the replacement: + +- **Scenario 1: Replacing after initial file upload (before creating flow)** + + - Use the ``video_key`` returned from the upload file request response. + +- **Scenario 2: Replacing after creating the flow (before publishing)** + + - Use the ``key`` value from the Create Flow response. + + This is required because the backend **renames the uploaded file** to distinguish it from automatically generated subformat files. + +- **Scenario 3: Replacing after publishing the video** + + - First make an edit request to modify the published video. + + - ``POST {{baseURL}}/api/deposits/video/{{deposit_id}}/actions/edit`` + + - Find the master file key from the response: + + .. code-block:: javascript + + let files = data.metadata?._files || []; + // Find the master file + let masterFile = files.find(f => f.context_type === "master"); + video_key = masterFile.key; + + - Use this ``video_key`` for the replacement request. + + +Do **not** use the original video file name (``video_name``) for replacement requests, +as this will not work due to the backend file renaming process. + +After finding the correct key, you can upload your new file (see `Step 3 <#step-3-upload-the-video>`_). + +Then, start the flow again using the new main video file, along with the updated ``version_id`` and ``video_key``. +You can follow the same structure outlined in `Step 4 <#step-4-create-a-flow>`_. diff --git a/cds/config.py b/cds/config.py index 770a5e5c3..5a31302a0 100644 --- a/cds/config.py +++ b/cds/config.py @@ -290,6 +290,7 @@ def _parse_env_bool(var_name, default=None): SEARCH_UI_VIDEO_MEDIUM = "templates/cds/video/featured-medium.html" # Angular template for small size (used for search results) SEARCH_UI_VIDEO_SMALL = "templates/cds/video/small.html" +HOMEPAGE_VIDEO_SMALL = "templates/cds/video/small_video_card.html" # Invenio-Stats # ============= @@ -999,6 +1000,44 @@ def _parse_env_bool(var_name, default=None): }, ] +HOMEPAGE_DYNAMIC_QUERIES = [ + { + "label": "LATEST LECTURES", + "query": "/api/records/?size=4&sort=mostrecent&q=collections:Lectures", + "qs": "collections=Lectures", + "subcategories": [ + { + "label": "Academic Training Lectures", + "qs": 'collections="Lectures::Academic Training Lectures"', + }, + { + "label": "CERN Accelerator School Lectures", + "qs": 'collections="Lectures::CERN Accelerator School"', + }, + { + "label": "E-Learning", + "qs": 'collections="Lectures::E-learning modules"', + }, + { + "label": "Conference records", + "qs": 'collections="Lectures::Talks, Seminars and Other Events,Conference records"', + }, + { + "label": "Scientific Seminars and Workshops", + "qs": 'collections="Lectures::Talks, Seminars and Other Events::Scientific Seminars and Workshops"', + }, + { + "label": "Teacher Programmes", + "qs": 'collections="Lectures::Talks, Seminars and Other Events::Teacher Programmes"', + }, + { + "label": "Student Lectures", + "qs": 'collections="Lectures::Talks, Seminars and Other Events::Student Lectures"', + }, + ], + } +] + FRONTPAGE_TREND_TOPICS = [ { "label": "Antimatter", @@ -1045,6 +1084,11 @@ def _parse_env_bool(var_name, default=None): "qs": 'q=keywords.name:"VNR" OR keywords.name:"video news release"', }, ] +############################################################################### +# Record Landing page +############################################################################### + +DESCRIPTION_PREVIEW_LINES = 10 ############################################################################### # Security @@ -1088,7 +1132,7 @@ def _parse_env_bool(var_name, default=None): "'unsafe-inline'", ], "img-src": ["'self'", "https://*.theoplayer.com", "data:"], - "connect-src": ["'self'", "https://*.theoplayer.com", "https://*.cern.ch"], + "connect-src": ["'self'", "https://*.theoplayer.com", "https://*.cern.ch", "blob:"], "object-src": ["'self'"], "media-src": ["'self'", "blob:"], "frame-src": ["'self'", "https://*.theoplayer.com"], @@ -1129,7 +1173,7 @@ def _parse_env_bool(var_name, default=None): "https://auth.cern.ch/auth/realms/cern/protocol/openid-connect/userinfo", ) -OAUTHCLIENT_CERN_OPENID_ALLOWED_ROLES = ["cern-user"] +OAUTHCLIENT_CERN_OPENID_ALLOWED_ROLES = ["cern-user", "authenticated-user"] OAUTHCLIENT_CERN_OPENID_REFRESH_TIMEDELTA = timedelta(minutes=-5) """Default interval for refreshing CERN extra data (e.g. groups). @@ -1258,6 +1302,7 @@ def _parse_env_bool(var_name, default=None): # Licence key and base URL for THEO player THEOPLAYER_LIBRARY_LOCATION = None THEOPLAYER_LICENSE = None + # Wowza server URL for m3u8 playlist generation WOWZA_PLAYLIST_URL = ( "https://wowza.cern.ch/cds/_definist_/smil:" "{filepath}/playlist.m3u8" @@ -1633,3 +1678,6 @@ def _parse_env_bool(var_name, default=None): # Sets the location to share the video files among the different tasks CDS_FILES_TMP_FOLDER = "/tmp/videos" + +# TODO: needs latest files-rest enabling range requests +FILES_REST_ALLOW_RANGE_REQUESTS = True diff --git a/cds/modules/deposit/api.py b/cds/modules/deposit/api.py index 4c19ec5c3..089fdbea9 100644 --- a/cds/modules/deposit/api.py +++ b/cds/modules/deposit/api.py @@ -65,6 +65,7 @@ get_tasks_status_grouped_by_task_name, merge_tasks_status, ) +from ..flows.tasks import ExtractChapterFramesTask from ..flows.models import FlowMetadata from ..invenio_deposit.api import Deposit, has_status, preserve from ..invenio_deposit.utils import mark_as_action @@ -76,7 +77,7 @@ ) from ..records.minters import cds_doi_generator, is_local_doi, report_number_minter from ..records.resolver import record_resolver -from ..records.utils import is_record, lowercase_value +from ..records.utils import is_record, lowercase_value, parse_video_chapters, get_existing_chapter_frame_timestamps from ..records.validators import PartialDraft4Validator from ..records.permissions import is_public from .errors import DiscardConflict @@ -504,7 +505,7 @@ def create(cls, data, id_=None, **kwargs): data.setdefault("_access", {}) access_update = data["_access"].setdefault("update", []) try: - if current_user.email not in access_update: + if current_user.email not in access_update: # Add the current user to the ``_access.update`` list access_update.append(current_user.email) except AttributeError: @@ -869,7 +870,7 @@ def _rename_subtitles(self): ) # copy tags to the newly created object version for tag in subtitle_obj.tags: - tag.object_version = obj + ObjectVersionTag.create_or_update(obj, tag.key, tag.value) subtitle_obj.remove() def _rename_master_file(self, master_file): @@ -905,11 +906,78 @@ def _publish_edited(self): return super(Video, self)._publish_edited() + def _has_chapters_changed(self, old_record=None): + """Check if chapters in description have changed.""" + current_description = self.get("description", "") + current_chapters = parse_video_chapters(current_description) + + if old_record is None: + # First publish - trigger if chapters exist + return len(current_chapters) > 0 + + old_description = old_record.get("description", "") + old_chapters = parse_video_chapters(old_description) + + # Compare chapter timestamps and titles + if len(current_chapters) != len(old_chapters): + return True + + for curr, old in zip(current_chapters, old_chapters): + if curr["seconds"] != old["seconds"] or curr["title"] != old["title"]: + return True + + if len(current_chapters) != len(get_existing_chapter_frame_timestamps(self)): + # Chapters did not change, but chapter frames doesn't exist + return True + + return False + + def _trigger_chapter_frame_extraction(self): + """Trigger chapter frame extraction asynchronously for existing video files.""" + try: + # Get the current flow for this deposit + current_flow = FlowMetadata.get_by_deposit(self["_deposit"]["id"]) + + if current_flow is None: + current_app.logger.warning( + f"No current flow found for video {self.id}. Cannot trigger chapter frame extraction." + ) + return + + current_app.logger.info( + f"Triggering asynchronous ExtractChapterFramesTask for video {self.id} with flow {current_flow.id}" + ) + + payload = current_flow.payload.copy() + + current_app.logger.info(f"Submitting ExtractChapterFramesTask with payload: {payload}") + + ExtractChapterFramesTask().s(**payload).apply_async() + + current_app.logger.info( + f"ExtractChapterFramesTask submitted asynchronously for video {self.id}, flow_id: {current_flow.id}" + ) + except Exception as e: + current_app.logger.error( + f"Failed to trigger async chapter frame extraction for video {self.id}: {e}" + ) + import traceback + + current_app.logger.error(f"Traceback: {traceback.format_exc()}") + @mark_as_action - def publish(self, pid=None, id_=None, **kwargs): + def publish(self, pid=None, id_=None, extract_chapters=True, **kwargs): """Publish a video and update the related project.""" # save a copy of the old PID video_old_id = self["_deposit"]["id"] + + # Check if this is a republish and get the old record + old_record = None + try: + _, old_record = self.fetch_published() + except KeyError as e: # First publish (no pid key) + pass + try: self["category"] = self.project["category"] self["type"] = self.project["type"] @@ -930,6 +998,13 @@ def publish(self, pid=None, id_=None, **kwargs): video_published = super(Video, self).publish(pid=pid, id_=id_, **kwargs) _, record_new = self.fetch_published() + # Check if chapters have changed and trigger frame extraction + if extract_chapters and self._has_chapters_changed(old_record): + current_app.logger.info( + f"Chapters changed for video {self.id}, triggering frame extraction" + ) + self._trigger_chapter_frame_extraction() + # update associated project video_published.project._update_videos( [video_build_url(video_old_id)], @@ -1088,7 +1163,6 @@ def _create_tags(self): except IndexError: return - def mint_doi(self): """Mint DOI.""" assert self.has_record() @@ -1109,7 +1183,7 @@ def mint_doi(self): status=PIDStatus.RESERVED, ) return self - + project_resolver = Resolver( pid_type="depid", diff --git a/cds/modules/deposit/ext.py b/cds/modules/deposit/ext.py index 0675488b1..f4a9d6466 100644 --- a/cds/modules/deposit/ext.py +++ b/cds/modules/deposit/ext.py @@ -25,11 +25,13 @@ """CDSDeposit app for Webhook receivers.""" import re +import mimetypes from invenio_base.signals import app_loaded from invenio_db import db from invenio_files_rest.models import ObjectVersionTag from invenio_files_rest.signals import file_uploaded +from invenio_files_rest.errors import InvalidKeyError from invenio_indexer.signals import before_record_index from invenio_records_files.utils import sorted_files_from_bucket @@ -45,38 +47,37 @@ def _create_tags(obj): """Create additional tags for file.""" - # Subtitle file - pattern = re.compile(".*_([a-zA-Z]{2})\.vtt$") + pattern_subtitle = re.compile(r".*_([a-zA-Z]{2})\.vtt$") + pattern_poster = re.compile(r"^poster\.(jpg|png)$") + + # Get the media_type and content_type(file ext) + file_name = obj.key + mimetypes.add_type("subtitle/vtt", ".vtt") + guessed_type = mimetypes.guess_type(file_name)[0] + if guessed_type is None: + raise InvalidKeyError(description=f"Unsupported File: {file_name}") + + media_type = guessed_type.split("/")[0] + file_ext = guessed_type.split("/")[1] + with db.session.begin_nested(): - # language tag - found = pattern.findall(obj.key) - if len(found) == 1: - lang = found[0] - ObjectVersionTag.create_or_update(obj, "language", lang) - else: - # clean to be sure there is no some previous value - ObjectVersionTag.delete(obj, "language") - # other tags - ObjectVersionTag.create_or_update(obj, "content_type", "vtt") - ObjectVersionTag.create_or_update(obj, "context_type", "subtitle") - ObjectVersionTag.create_or_update(obj, "media_type", "subtitle") - # refresh object - db.session.add(obj) + ObjectVersionTag.create_or_update(obj, "content_type", file_ext) + ObjectVersionTag.create_or_update(obj, "media_type", media_type) + if file_ext == "vtt": + # language tag + match = pattern_subtitle.search(file_name) + if match: + ObjectVersionTag.create_or_update(obj, "language", match.group(1)) + else: + ObjectVersionTag.delete(obj, "language") + # other tags + ObjectVersionTag.create_or_update(obj, "content_type", "vtt") + ObjectVersionTag.create_or_update(obj, "context_type", "subtitle") + # poster tag + elif pattern_poster.match(file_name): + ObjectVersionTag.create_or_update(obj, "context_type", "poster") - # Poster frame - pattern = re.compile("^poster\.(jpg|png)$") - try: - poster = pattern.findall(obj.key) - if poster: - ext = pattern.findall(poster.key)[0] - # frame tags - ObjectVersionTag.create_or_update(poster, "content_type", ext) - ObjectVersionTag.create_or_update(poster, "context_type", "poster") - ObjectVersionTag.create_or_update(poster, "media_type", "image") - # refresh object - db.session.add(poster) - except IndexError: - return + db.session.add(obj) def create_tags_on_file_upload(sender, obj): diff --git a/cds/modules/deposit/mappings/os-v2/deposits/records/videos/video/video-v1.0.0.json b/cds/modules/deposit/mappings/os-v2/deposits/records/videos/video/video-v1.0.0.json index ae4cbfb87..01969cb3c 100644 --- a/cds/modules/deposit/mappings/os-v2/deposits/records/videos/video/video-v1.0.0.json +++ b/cds/modules/deposit/mappings/os-v2/deposits/records/videos/video/video-v1.0.0.json @@ -228,6 +228,9 @@ "recid": { "type": "double" }, + "legacy_recid": { + "type": "double" + }, "license": { "properties": { "license": { @@ -333,6 +336,133 @@ "type": "keyword" } } + }, + "alternate_identifiers": { + "properties": { + "scheme": { + "type": "keyword" + }, + "value": { + "type": "keyword" + } + } + }, + "_curation": { + "type": "object", + "properties": { + "legacy_report_number": { + "type": "keyword" + }, + "department": { + "type": "keyword" + }, + "volumes": { + "type": "text" + }, + "physical_location": { + "type": "text" + }, + "physical_medium": { + "type": "text" + }, + "internal_note": { + "type": "text" + }, + "digitized": { + "type": "object", + "properties": { + "url": { + "type": "text" + }, + "format": { + "type": "text" + }, + "link_text": { + "type": "text" + }, + "public_note": { + "type": "text" + }, + "nonpublic_note": { + "type": "text" + }, + "md5_checksum": { + "type": "text" + }, + "source": { + "type": "text" + } + } + }, + "legacy_marc_fields": { + "type": "object", + "properties": { + "964": { + "type": "text" + }, + "336": { + "type": "text" + }, + "583": { + "type": "text" + }, + "306": { + "type": "text" + } + } + } + } + }, + "additional_titles": { + "type": "object", + "properties": { + "title": { + "type": "text" + }, + "type": { + "type": "keyword" + }, + "lang": { + "type": "keyword" + } + } + }, + "additional_descriptions": { + "type": "object", + "properties": { + "description": { + "type": "text" + }, + "type": { + "type": "keyword" + }, + "lang": { + "type": "keyword" + } + } + }, + "related_identifiers": { + "type": "object", + "properties": { + "identifier": { + "type": "text" + }, + "scheme": { + "type": "keyword" + }, + "relation_type": { + "type": "keyword" + }, + "resource_type": { + "type": "keyword" + } + } + }, + "collections": { + "type": "keyword" + }, + "additional_languages": { + "type": "text" } } } diff --git a/cds/modules/deposit/receivers.py b/cds/modules/deposit/receivers.py index 18727fec7..d5949a39d 100644 --- a/cds/modules/deposit/receivers.py +++ b/cds/modules/deposit/receivers.py @@ -33,6 +33,7 @@ from cds.modules.flows.tasks import ( DownloadTask, ExtractFramesTask, + ExtractChapterFramesTask, ExtractMetadataTask, TranscodeVideoTask, ) @@ -87,4 +88,5 @@ def register_celery_class_based_tasks(sender, app=None): celery.register_task(ExtractMetadataTask()) celery.register_task(DownloadTask()) celery.register_task(ExtractFramesTask()) + celery.register_task(ExtractChapterFramesTask()) celery.register_task(TranscodeVideoTask()) diff --git a/cds/modules/deposit/schemas/deposits/records/videos/project/project-v1.0.0.json b/cds/modules/deposit/schemas/deposits/records/videos/project/project-v1.0.0.json index 2e91fa703..0729bb117 100644 --- a/cds/modules/deposit/schemas/deposits/records/videos/project/project-v1.0.0.json +++ b/cds/modules/deposit/schemas/deposits/records/videos/project/project-v1.0.0.json @@ -142,6 +142,7 @@ "Camera Operator", "Comments by", "Co-Producer", + "ContactPerson", "Creator", "Credits", "Director", @@ -150,7 +151,9 @@ "Narrator", "Photography", "Producer", + "RelatedPerson", "Reporter", + "ResearchGroup", "Screenwriter", "Speaker", "Subtitles by", diff --git a/cds/modules/deposit/schemas/deposits/records/videos/video/definitions-v1.0.0.json b/cds/modules/deposit/schemas/deposits/records/videos/video/definitions-v1.0.0.json new file mode 100644 index 000000000..e1c217180 --- /dev/null +++ b/cds/modules/deposit/schemas/deposits/records/videos/video/definitions-v1.0.0.json @@ -0,0 +1,57 @@ +{ + "languages": { + "enum": [ + "ar", + "ast", + "bg", + "ca", + "ch", + "cs", + "cy", + "da", + "de", + "el", + "en", + "en-fr", + "es", + "et", + "eu", + "fi", + "fr", + "ga", + "gd", + "gl", + "he", + "hi", + "hr", + "hu", + "it", + "lt", + "ja", + "ka", + "ko", + "kw", + "nb", + "nl", + "nn", + "no", + "pl", + "pt", + "rm", + "ro", + "ru", + "se", + "silent", + "sk", + "sl", + "sr", + "sv", + "tr", + "uk", + "ur", + "zh", + "zh_CN", + "zh_TW" + ] + } +} diff --git a/cds/modules/deposit/schemas/deposits/records/videos/video/video-v1.0.0.json b/cds/modules/deposit/schemas/deposits/records/videos/video/video-v1.0.0.json index 8c84e84bd..ae15d3d3c 100644 --- a/cds/modules/deposit/schemas/deposits/records/videos/video/video-v1.0.0.json +++ b/cds/modules/deposit/schemas/deposits/records/videos/video/video-v1.0.0.json @@ -62,6 +62,7 @@ "Camera Operator", "Comments by", "Co-Producer", + "ContactPerson", "Creator", "Credits", "Director", @@ -70,7 +71,9 @@ "Narrator", "Photography", "Producer", + "RelatedPerson", "Reporter", + "ResearchGroup", "Screenwriter", "Speaker", "Subtitles by", @@ -190,6 +193,24 @@ "description": "List of identifiers on external systems.", "title": "External identifiers" }, + "alternate_identifiers": { + "items": { + "properties": { + "scheme": { + "title": "Scheme of the identifier (Vocabulary)", + "type": "string", + "enum": ["URL", "DOI", "CDS"] + }, + "value": { + "title": "Value of the identifier", + "type": "string" + } + } + }, + "required": ["value", "scheme"], + "title": "List of alternate identifiers of the record", + "type": "array" + }, "$schema": { "type": "string" }, @@ -441,59 +462,7 @@ }, "language": { "default": "en", - "enum": [ - "ar", - "ast", - "bg", - "ca", - "ch", - "cs", - "cy", - "da", - "de", - "el", - "en", - "en-fr", - "es", - "et", - "eu", - "fi", - "fr", - "ga", - "gd", - "gl", - "he", - "hi", - "hr", - "hu", - "it", - "lt", - "ja", - "ka", - "ko", - "kw", - "nb", - "nl", - "nn", - "no", - "pl", - "pt", - "rm", - "ro", - "ru", - "se", - "silent", - "sk", - "sl", - "sr", - "sv", - "tr", - "uk", - "ur", - "zh", - "zh_CN", - "zh_TW" - ], + "$ref": "definitions-v1.0.0.json#/languages", "type": "string", "description": "A language of the resource." }, @@ -563,59 +532,7 @@ "language": { "description": "A language of the resource.", "default": "en", - "enum": [ - "ar", - "ast", - "bg", - "ca", - "ch", - "cs", - "cy", - "da", - "de", - "el", - "en", - "en-fr", - "es", - "et", - "eu", - "fi", - "fr", - "ga", - "gd", - "gl", - "he", - "hi", - "hr", - "hu", - "it", - "lt", - "ja", - "ka", - "ko", - "kw", - "nb", - "nl", - "nn", - "no", - "pl", - "pt", - "rm", - "ro", - "ru", - "se", - "silent", - "sk", - "sl", - "sr", - "sv", - "tr", - "uk", - "ur", - "zh", - "zh_CN", - "zh_TW" - ], + "$ref": "definitions-v1.0.0.json#/languages", "type": "string" }, "license": { @@ -696,11 +613,218 @@ "type": "number", "description": "Invenio record identifier (integer)." }, + "legacy_recid": { + "type": "number", + "description": "Legacy record identifier (integer). Kept for auditing reasons." + }, "original_source": { "type": "string" }, "_project_id": { "type": "string" + }, + "_curation": { + "properties": { + "legacy_report_number": { + "title": "Legacy record report number.", + "type": "array", + "items": { + "type": "string" + } + }, + "department": { + "title": "CERN department.", + "type": "string" + }, + "volumes": { + "title": "Volume list for this record.", + "type": "array", + "items": { + "type": "string" + } + }, + "physical_location": { + "title": "Tag 852 physical location.", + "type": "array", + "items": { + "type": "string" + } + }, + "physical_medium": { + "title": "Tag 340 physical medium.", + "type": "array", + "items": { + "type": "string" + } + }, + "internal_note": { + "title": "Tag 595 internal note.", + "type": "array", + "items": { + "type": "string" + } + }, + "digitized": { + "title": "Digitized metadata.", + "type": "array", + "items": { + "type": "object", + "properties": { + "url": { + "type": "string" + }, + "format": { + "type": "string" + }, + "link_text": { + "type": "string" + }, + "public_note": { + "type": "string" + }, + "nonpublic_note": { + "type": "string" + }, + "md5_checksum": { + "type": "string" + }, + "source": { + "type": "string" + } + } + } + }, + "legacy_marc_fields": { + "type": "object", + "properties": { + "964": { + "title": "Tag 964.", + "type": "array", + "items": { + "type": "string" + } + }, + "336": { + "title": "Tag 336.", + "type": "array", + "items": { + "type": "string" + } + }, + "583": { + "title": "Tag 583.", + "type": "array", + "items": { + "type": "string" + } + }, + "306": { + "title": "Tag 306.", + "type": "array", + "items": { + "type": "string" + } + } + } + } + }, + "title": "Fields that needs curation.", + "description": "This section contains MARC21 metadata fields that could not be mapped during weblectures migration.", + "type": "object" + }, + "additional_titles": { + "description": "Additional record titles.", + "type": "array", + "items": { + "type": "object", + "additionalProperties": false, + "properties": { + "title": { + "description": "Additional title of the record.", + "type": "string" + }, + "type": { + "type": "string", + "enum": ["Subtitle", "Other", "TranslatedTitle", "AlternativeTitle"] + }, + "lang": { + "type": "string", + "$ref": "definitions-v1.0.0.json#/languages" + } + } + } + }, + "additional_descriptions": { + "description": "Additional descriptions for the record.", + "type": "array", + "items": { + "type": "object", + "additionalProperties": false, + "properties": { + "description": { + "type": "string", + "description": "Descriptive content." + }, + "type": { + "type": "string", + "enum": [ + "Abstract", + "Methods", + "Other", + "SeriesInformation", + "TableOfContents", + "TechnicalInfo" + ] + }, + "lang": { + "type": "string", + "$ref": "definitions-v1.0.0.json#/languages" + } + } + } + }, + "related_identifiers": { + "type": "array", + "items": { + "type": "object", + "required": ["identifier", "scheme", "relation_type"], + "additionalProperties": false, + "properties": { + "identifier": { + "type": "string", + "description": "The actual identifier (e.g., URL or DOI)." + }, + "scheme": { + "type": "string", + "enum": ["URL", "DOI", "CDS", "Indico"], + "description": "The scheme describing the identifier type." + }, + "relation_type": { + "type": "string", + "enum": ["IsPartOf", "IsVariantFormOf"], + "description": "Describes the relationship with the current record." + }, + "resource_type": { + "type": "string", + "enum": ["Event", "ConferencePaper", "Report", "Book"], + "description": "Type of the related resource." + } + } + } + }, + "collections": { + "items": { + "type": "string" + }, + "type": "array" + }, + "additional_languages": { + "description": "Additional languages for the record.", + "type": "array", + "items": { + "type": "string", + "$ref": "definitions-v1.0.0.json#/languages" + } } } } \ No newline at end of file diff --git a/cds/modules/deposit/static/json/cds_deposit/forms/project.json b/cds/modules/deposit/static/json/cds_deposit/forms/project.json index 254ffeb66..b80c100fe 100644 --- a/cds/modules/deposit/static/json/cds_deposit/forms/project.json +++ b/cds/modules/deposit/static/json/cds_deposit/forms/project.json @@ -160,6 +160,10 @@ "value": "Co-Producer", "name": "Co-Producer" }, + { + "value": "ContactPerson", + "name": "Contact Person" + }, { "value": "Creator", "name": "Creator" @@ -192,10 +196,18 @@ "value": "Producer", "name": "Producer" }, + { + "value": "RelatedPerson", + "name": "Related Person" + }, { "value": "Reporter", "name": "Reporter" }, + { + "value": "ResearchGroup", + "name": "Research Group" + }, { "value": "Screenwriter", "name": "Screenwriter" diff --git a/cds/modules/deposit/static/json/cds_deposit/forms/video.json b/cds/modules/deposit/static/json/cds_deposit/forms/video.json index c90b9884b..1d5448dd2 100644 --- a/cds/modules/deposit/static/json/cds_deposit/forms/video.json +++ b/cds/modules/deposit/static/json/cds_deposit/forms/video.json @@ -292,6 +292,10 @@ "value": "Co-Producer", "name": "Co-Producer" }, + { + "value": "ContactPerson", + "name": "Contact Person" + }, { "value": "Creator", "name": "Creator" @@ -324,10 +328,18 @@ "value": "Producer", "name": "Producer" }, + { + "value": "RelatedPerson", + "name": "Related Person" + }, { "value": "Reporter", "name": "Reporter" }, + { + "value": "ResearchGroup", + "name": "Research Group" + }, { "value": "Screenwriter", "name": "Screenwriter" @@ -555,17 +567,143 @@ ], "related_links": [ { - "key": "related_links", + "key": "related_identifiers", "type": "array", - "add": "Add related links", + "add": "Add related identifiers", + "title": "Related Identifiers", + "description": "Add identifiers for related resources such as DOIs, URLs, or Indico event IDs.", "items": [ { - "title": "Name", - "key": "related_links[].name" + "title": "Identifier", + "key": "related_identifiers[].identifier", + "required": true, + "placeholder": "e.g., 10.1234/example.doi, https://example.com, 12345", + "description": "The identifier value (DOI, URL, or Indico event ID)" + }, + { + "title": "Scheme", + "key": "related_identifiers[].scheme", + "type": "select", + "required": true, + "placeholder": "Select identifier scheme", + "description": "The type of identifier scheme", + "titleMap": [ + { + "value": "URL", + "name": "URL (Uniform Resource Locator)" + }, + { + "value": "DOI", + "name": "DOI (Digital Object Identifier)" + }, + { + "value": "Indico", + "name": "Indico (Event ID)" + }, + { + "value": "CDS", + "name": "CDS (CERN Document Server Record ID)" + } + ] + }, + { + "title": "Relation Type", + "key": "related_identifiers[].relation_type", + "type": "select", + "required": true, + "placeholder": "Select relation type", + "description": "How this resource relates to the identified resource", + "titleMap": [ + { + "value": "IsPartOf", + "name": "Is part of" + }, + { + "value": "IsVariantFormOf", + "name": "Is variant form of" + } + ] }, { - "title": "URL", - "key": "related_links[].url" + "title": "Resource Type", + "key": "related_identifiers[].resource_type", + "type": "select", + "placeholder": "Select resource type (optional)", + "description": "The type of the related resource (optional)", + "titleMap": [ + { + "value": "Audiovisual", + "name": "Audiovisual" + }, + { + "value": "Book", + "name": "Book" + }, + { + "value": "Collection", + "name": "Collection" + }, + { + "value": "ConferencePaper", + "name": "Conference Paper" + }, + { + "value": "DataPaper", + "name": "Data Paper" + }, + { + "value": "Dataset", + "name": "Dataset" + }, + { + "value": "Event", + "name": "Event" + }, + { + "value": "Image", + "name": "Image" + }, + { + "value": "InteractiveResource", + "name": "Interactive Resource" + }, + { + "value": "Model", + "name": "Model" + }, + { + "value": "PhysicalObject", + "name": "Physical Object" + }, + { + "value": "Report", + "name": "Report" + }, + { + "value": "Service", + "name": "Service" + }, + { + "value": "Software", + "name": "Software" + }, + { + "value": "Sound", + "name": "Sound" + }, + { + "value": "Text", + "name": "Text" + }, + { + "value": "Workflow", + "name": "Workflow" + }, + { + "value": "Other", + "name": "Other" + } + ] } ] } diff --git a/cds/modules/deposit/static/templates/cds_deposit/deposits.html b/cds/modules/deposit/static/templates/cds_deposit/deposits.html index bb9c37b8a..1815ee009 100644 --- a/cds/modules/deposit/static/templates/cds_deposit/deposits.html +++ b/cds/modules/deposit/static/templates/cds_deposit/deposits.html @@ -39,6 +39,7 @@
Tips

Click here to select videos to upload

You can also Drag & Drop video files here

+

supported files {{ $ctrl.videoExtensions }}

diff --git a/cds/modules/deposit/static/templates/cds_deposit/types/video/form.html b/cds/modules/deposit/static/templates/cds_deposit/types/video/form.html index 3e3fe027c..bd606dfa7 100644 --- a/cds/modules/deposit/static/templates/cds_deposit/types/video/form.html +++ b/cds/modules/deposit/static/templates/cds_deposit/types/video/form.html @@ -206,7 +206,7 @@

  • - Related links + Related information
  • diff --git a/cds/modules/deposit/static/templates/cds_deposit/types/video/uploader.html b/cds/modules/deposit/static/templates/cds_deposit/types/video/uploader.html index 872f8f518..829343572 100644 --- a/cds/modules/deposit/static/templates/cds_deposit/types/video/uploader.html +++ b/cds/modules/deposit/static/templates/cds_deposit/types/video/uploader.html @@ -9,7 +9,7 @@
    - Master & Subformats files + Main & Subformats files
    @@ -224,12 +224,12 @@
    Tips and suggestions
    - -
    + +
    - Other files + Additional files
    -
    +

    @@ -242,7 +242,7 @@

    Tips and suggestions
    - +
    @@ -271,6 +271,32 @@
    Tips and suggestions
    Filename Size
    + +
    +
    +
    +
    +

    + +

    +
    +

    Upload complimentary files for this video

    +

    Or Drag & Drop files

    +
    +
    +
    +
    +
    +

    @@ -283,39 +309,43 @@
    Tips and suggestions
    -
    -
    -
    -

    - -

    -
    -
    -

    Upload complimentary files for this video

    + + +
    +
    + Replace Video File +
    +
    +
    +
    +
    +

    + +

    +
    +

    To replace the video file, just upload a video here.

    +

    Or Drag & Drop files

    +
    -

    Or Drag & Drop files

    -
    -
    -
    Tips and suggestions
    -
      -
    • To replace the video file, just upload another video.
    • -
    -
    + +
    Tips and suggestions
    • Click the Edit button on the top right corner to add more files.
    -
    +
    \ No newline at end of file diff --git a/cds/modules/deposit/views.py b/cds/modules/deposit/views.py index 1d880ed65..da8b9a7c9 100644 --- a/cds/modules/deposit/views.py +++ b/cds/modules/deposit/views.py @@ -25,6 +25,7 @@ """CDS interface.""" +from cds.modules.ldap.decorators import require_upload_permission from flask import ( Blueprint, abort, @@ -118,6 +119,7 @@ def to_links_js(pid, deposit=None, dep_type=None): @blueprint.route("/deposit/reportnumbers/new", methods=["GET", "POST"]) @login_required +@require_upload_permission() def reserve_report_number(): """Form to reserver a new report number.""" if not has_read_record_eos_path_permission(current_user, None): @@ -156,6 +158,7 @@ def reserve_report_number(): "/deposit/reportnumbers/assign/", methods=["GET", "POST"] ) @login_required +@require_upload_permission() def assign_report_number(depid): """Form to reserver a new report number.""" if not has_read_record_eos_path_permission(current_user, None): diff --git a/cds/modules/fixtures/data/categories.json b/cds/modules/fixtures/data/categories.json index 015ea1951..1b100c679 100644 --- a/cds/modules/fixtures/data/categories.json +++ b/cds/modules/fixtures/data/categories.json @@ -95,5 +95,18 @@ "_access": { "read": ["alice-secretariat@cern.ch"] } + }, + { + "name": "LECTURES", + "types": ["VIDEO"], + "access": { + "public": true, + "restricted": [], + "responsible": ["weblecture-service@cern.ch"] + }, + "_record_type": ["PROJECT"], + "_access": { + "read": ["weblecture-service@cern.ch"] + } } ] diff --git a/cds/modules/fixtures/data/pages/guides/search.html b/cds/modules/fixtures/data/pages/guides/search.html index af221fe89..2bc11082d 100644 --- a/cds/modules/fixtures/data/pages/guides/search.html +++ b/cds/modules/fixtures/data/pages/guides/search.html @@ -371,6 +371,8 @@

    Contributor roles (CV)

  • Comments by
  • +
  • Contact Person
  • +
  • Co-Producer
  • Creator
  • @@ -389,8 +391,12 @@

    Contributor roles (CV)

  • Producer
  • +
  • Related Person
  • +
  • Reporter
  • +
  • Research Group
  • +
  • Screenwriter
  • Speaker
  • diff --git a/cds/modules/flows/api.py b/cds/modules/flows/api.py index 136bef012..09acbaadf 100644 --- a/cds/modules/flows/api.py +++ b/cds/modules/flows/api.py @@ -39,6 +39,7 @@ from .tasks import ( CeleryTask, DownloadTask, + ExtractChapterFramesTask, ExtractFramesTask, ExtractMetadataTask, TranscodeVideoTask, @@ -245,6 +246,7 @@ def _find_celery_task_by_name(name): ExtractMetadataTask, ExtractFramesTask, TranscodeVideoTask, + ExtractChapterFramesTask, ]: if celery_task.name == name: return celery_task diff --git a/cds/modules/flows/tasks.py b/cds/modules/flows/tasks.py index eaa97952a..a3c2009ea 100644 --- a/cds/modules/flows/tasks.py +++ b/cds/modules/flows/tasks.py @@ -28,6 +28,7 @@ import shutil import signal import tempfile +from io import BytesIO import jsonpatch import requests @@ -57,12 +58,12 @@ from cds.modules.flows.models import FlowTaskMetadata from cds.modules.flows.models import FlowTaskStatus as FlowTaskStatus - +from cds.modules.records.api import CDSVideosFilesIterator from ..ffmpeg import ff_frames, ff_probe_all from ..opencast.api import OpenCast from ..opencast.error import RequestError from ..opencast.utils import get_qualities -from ..records.utils import to_string +from ..records.utils import to_string, parse_video_chapters, get_existing_chapter_frame_timestamps from ..xrootd.utils import file_opener_xrootd from .deposit import index_deposit_project from .files import dispose_object_version, move_file_into_local @@ -197,7 +198,9 @@ def _meta_exception_envelope(self, exc): NOTE: workaround to be able to save the payload in celery in case of exceptions. """ - meta = dict(message=str(exc), payload=self._base_payload) + # Safety check in case base payload is not set yet + payload = getattr(self, '_base_payload', {}) + meta = dict(message=str(exc), payload=payload) return dict(exc_message=meta, exc_type=exc.__class__.__name__) def on_failure(self, exc, task_id, args, kwargs, einfo): @@ -223,7 +226,16 @@ def on_success(self, exc, task_id, args, kwargs): def _reindex_video_project(self): """Reindex video and project.""" with celery_app.flask_app.app_context(): - deposit_id = self._base_payload["deposit_id"] + # Safety check in case base payload is not set yet + if not hasattr(self, '_base_payload') or not self._base_payload or 'deposit_id' not in self._base_payload: + if hasattr(self, 'deposit_id') and self.deposit_id: + deposit_id = self.deposit_id + else: + self.log("Cannot reindex: deposit_id not available") + return + else: + deposit_id = self._base_payload["deposit_id"] + try: index_deposit_project(deposit_id) except PIDDeletedError: @@ -590,10 +602,10 @@ def progress_updater(current_frame): object_=self.object_version, output_dir=output_folder, progress_updater=progress_updater, - **options + **options, ), object_=self.object_version, - **options + **options, ) except Exception: db.session.rollback() @@ -601,6 +613,8 @@ def progress_updater(current_frame): self.clean(version_id=self.object_version_id) raise + total_frames = len(frames) + # Generate GIF images self._create_gif( bucket=str(self.object_version.bucket.id), @@ -618,7 +632,7 @@ def progress_updater(current_frame): db.session.commit() self.log("Finished task {0}".format(kwargs["task_id"])) - return "Created {0} frames.".format(len(frames)) + return "Created {0} frames.".format(total_frames) @classmethod def _time_position(cls, duration, frames_start=5, frames_end=95, frames_gap=10): @@ -648,7 +662,7 @@ def _create_tmp_frames( duration, output_dir, progress_updater=None, - **kwargs + **kwargs, ): """Create frames in temporary files.""" # Generate frames @@ -727,6 +741,276 @@ def _create_object( [ObjectVersionTag.create(obj, k, to_string(tags[k])) for k in tags] +class ExtractChapterFramesTask(AVCTask): + """Extract chapter frames task - dedicated task for chapter frame extraction only.""" + + name = "file_video_extract_chapter_frames" + + @staticmethod + def clean(version_id, valid_chapter_seconds=None, *args, **kwargs): + """Delete generated chapter frame ObjectVersion slaves. + + - If valid_chapter_seconds is given, keep them. + - If not, remove all chapter frames. + """ + valid_chapter_seconds = valid_chapter_seconds or [] + # remove all objects version "slave" with type "frame" that are chapter frames + tag_alias_1 = aliased(ObjectVersionTag) + tag_alias_2 = aliased(ObjectVersionTag) + tag_alias_3 = aliased(ObjectVersionTag) + + slaves = ( + ObjectVersion.query.join(tag_alias_1, ObjectVersion.tags) + .join(tag_alias_2, ObjectVersion.tags) + .join(tag_alias_3, ObjectVersion.tags) + .filter(tag_alias_1.key == "master", tag_alias_1.value == version_id) + .filter(tag_alias_2.key == "context_type", tag_alias_2.value == "frame") + .filter(tag_alias_3.key == "is_chapter_frame", tag_alias_3.value == "true") + .all() + ) + + for slave in slaves: + ts_val = next(t.value for t in slave.tags if t.key == "timestamp") + if ts_val in valid_chapter_seconds: + continue + dispose_object_version(slave) + + # If no valid chapter seconds, remove the chapters.vtt file + if not valid_chapter_seconds: + master_obj = ObjectVersion.query.get(version_id) + vtt_objs = ObjectVersion.get_versions(master_obj.bucket_id, "chapters.vtt") + for vtt_obj in vtt_objs: + dispose_object_version(vtt_obj) + + def run(self, *args, **kwargs): + """Extract frames only at chapter timestamps from video description. + + This task is specifically designed to extract frames for chapters only, + without affecting other frame extraction processes. + + The task receives parameters through the standard AVCTask initialization: + - self.deposit_id: The deposit ID containing the video description + - self.object_version: The ObjectVersion of the master video file + - self.flow_id: The current flow ID for task metadata integration + """ + + # Create or update the TaskMetadata + flow_task_metadata = self.get_or_create_flow_task() + kwargs["celery_task_id"] = str(self.request.id) + kwargs["task_id"] = str(flow_task_metadata.id) + flow_task_metadata.payload = self.get_full_payload(**kwargs) + flow_task_metadata.status = FlowTaskStatus.STARTED + flow_task_metadata.message = "" + db.session.commit() + + self.log("Started task {0}".format(kwargs["task_id"])) + + output_folder = tempfile.mkdtemp() + + # Remove temporary directory on abrupt execution halts. + self.set_revoke_handler( + lambda: shutil.rmtree(output_folder, ignore_errors=True) + ) + + def progress_updater(current_chapter): + """Progress reporter.""" + percentage = current_chapter / len(chapters) * 100 + meta = dict( + payload=dict(size=len(chapters), percentage=percentage), + message="Extracting chapter frames [{0} out of {1}]".format( + current_chapter, len(chapters) + ), + ) + self.log(meta["message"]) + + bucket_was_locked = False + if self.object_version.bucket.locked: + # If record was published we need to unlock the bucket + bucket_was_locked = True + self.object_version.bucket.locked = False + + try: + # Get the deposit to access the description + from cds.modules.deposit.api import deposit_video_resolver + db.session.refresh(self.object_version) + deposit_video = deposit_video_resolver(self.deposit_id) + description = deposit_video.get("description", "") + + self.log("Found description with {0} characters".format(len(description))) + + # Parse chapters from description + chapters = parse_video_chapters(description) + + self.log("Found {0} chapters in description".format(len(chapters))) + + # Get video duration from metadata + duration = float(self._base_payload.get("tags", {}).get("duration", 0)) + + if duration == 0: + raise ValueError("Video duration is 0 - cannot extract frames") + + # Check which timestamps already have frames + existing_timestamps = get_existing_chapter_frame_timestamps(deposit_video) + + frames, chapter_seconds = self._create_chapter_frames( + chapters=chapters, + duration=duration, + object_=self.object_version, + output_dir=output_folder, + existing_timestamps=existing_timestamps, + progress_updater=progress_updater, + ) + + # Clean unused chapters + self.clean(version_id=self.object_version_id, valid_chapter_seconds=chapter_seconds) + + # Create or update WebVTT file for chapters + self._build_chapter_vtt(chapters, duration) + + # Sync deposit and record files + sync_records_with_deposit_files(self.deposit_id) + + except Exception: + db.session.rollback() + shutil.rmtree(output_folder, ignore_errors=True) + self.clean(version_id=self.object_version_id) + raise + + total_frames = len(frames) + + if bucket_was_locked: + # Lock the bucket again + self.object_version.bucket.locked = True + + # Cleanup + shutil.rmtree(output_folder) + + self.log("Finished task {0}".format(kwargs["task_id"])) + return "Created {0} chapter frames.".format(total_frames) + + @classmethod + def _create_chapter_frames( + cls, + chapters, + duration, + object_, + output_dir, + existing_timestamps, + progress_updater=None, + ): + """Create frames for chapters that don't already exist at those timestamps.""" + created_frames = [] + valid_chapter_seconds = [] + current_chapter = 0 + + with move_file_into_local(object_, delete=True) as url: + for chapter in chapters: + current_chapter += 1 + + if progress_updater: + progress_updater(current_chapter) + + chapter_seconds = chapter["seconds"] + chapter_title = chapter["title"] + + # Skip chapters that are beyond video duration + if chapter_seconds > duration: + continue + + # For 0:00 chapters, use a small offset to avoid extraction issues + chapter_seconds = max(chapter_seconds, 0.1) if chapter_seconds == 0 else chapter_seconds + valid_chapter_seconds.append(to_string(chapter_seconds)) + + # Skip if frame already exists at this timestamp (with some tolerance) + timestamp_exists = any( + abs(existing_ts - chapter_seconds) < 0.1 + for existing_ts in existing_timestamps + ) + if timestamp_exists: + continue + + frame_filename = "chapter-{0}.jpg".format(int(chapter_seconds)) + frame_path = os.path.join(output_dir, frame_filename) + + # Ensure we don't exceed duration + if chapter_seconds + 0.01 >= duration: + chapter_seconds = max(0, duration - 0.02) + try: + # Extract single frame at chapter timestamp using ff_frames + ff_frames( + input_file=url, + start=chapter_seconds, + end=chapter_seconds + 0.01, # Extract just one frame + step=1, + duration=duration, + output=frame_path, + ) + + if os.path.exists(frame_path) and os.path.getsize(frame_path) > 0: + # Create ObjectVersion for chapter frame (as normal frame) + ExtractFramesTask._create_object( + bucket=object_.bucket, + key=frame_filename, + stream=file_opener_xrootd(frame_path, "rb"), + size=os.path.getsize(frame_path), + media_type="image", + context_type="frame", + master_id=object_.version_id, + is_chapter_frame=True, + timestamp=chapter_seconds, + ) + + created_frames.append(frame_path) + + except Exception as e: + # Log error but continue with other chapters + current_app.logger.error( + "Failed to extract frame for chapter at {0}s: {1}".format( + chapter_seconds, str(e) + ) + ) + continue + + return created_frames, valid_chapter_seconds + + def _build_chapter_vtt(self, chapters, duration): + """Build WebVTT content string from chapters list.""" + if not chapters: + return + vtt = "WEBVTT\n\n" + for i, c in enumerate(sorted(chapters, key=lambda x: x["seconds"])): + start = c["seconds"] + end = chapters[i+1]["seconds"] if i+1 < len(chapters) else duration + if end > duration: + end = duration + start_str = "{:02}:{:02}:{:02}.000".format( + int(start // 3600), + int((start % 3600) // 60), + int(start % 60) + ) + end_str = "{:02}:{:02}:{:02}.000".format( + int(end // 3600), + int((end % 3600) // 60), + int(end % 60) + ) + vtt += f"{i+1}\n{start_str} --> {end_str}\n{c['title']}\n\n" + + vtt_bytes = vtt.encode("utf-8") + vtt_key = "chapters.vtt" + + bucket = as_bucket(self.object_version.bucket.id) + obj = ObjectVersion.create( + bucket=bucket, + key=vtt_key, + stream=BytesIO(vtt_bytes), + size=len(vtt_bytes), + ) + ObjectVersionTag.create(obj, "media_type", "chapters") + ObjectVersionTag.create(obj, "context_type", "chapters") + ObjectVersionTag.create(obj, "content_type", "vtt") + self.log("Created chapters.vtt") + + class TranscodeVideoTask(AVCTask): """Transcode video task. @@ -793,7 +1077,7 @@ def _update_flow_tasks(self, flow_tasks, status, message, **kwargs): opencast_publication_tag=current_app.config["CDS_OPENCAST_QUALITIES"][ quality ]["opencast_publication_tag"], - **kwargs # may contain `opencast_event_id` + **kwargs, # may contain `opencast_event_id` ) # JSONb cols needs to be assigned (not updated) to be persisted flow_task_metadata.payload = new_payload @@ -848,7 +1132,7 @@ def _start_transcodable_flow_tasks_or_cancel(self, wanted_qualities=None): new_payload.update( task_id=str(t.id), celery_task_id=str(self.request.id), - **self._base_payload + **self._base_payload, ) # JSONb cols needs to be assigned (not updated) to be persisted t.payload = new_payload diff --git a/cds/modules/home/templates/cds_home/home.html b/cds/modules/home/templates/cds_home/home.html index 8be685422..69f4382fc 100644 --- a/cds/modules/home/templates/cds_home/home.html +++ b/cds/modules/home/templates/cds_home/home.html @@ -78,8 +78,53 @@

    {{ _('TRENDS') }} + + {% for section in config.HOMEPAGE_DYNAMIC_QUERIES %} +
    +
    +
    +

    + {{ _(section.label) }} +

    + + + {{ _('Explore all') }} + +
    + + {% if section.subcategories %} +
    + +
    + {% endif %} + +
    + + + + + +
    +
    +
    + {% endfor %} + + + -
    +
    diff --git a/cds/modules/home/views.py b/cds/modules/home/views.py index 9a70122db..968d341f0 100644 --- a/cds/modules/home/views.py +++ b/cds/modules/home/views.py @@ -25,6 +25,8 @@ from invenio_cache.decorators import cached_unless_authenticated from invenio_i18n import lazy_gettext as _ +from ..records.permissions import has_upload_permission + blueprint = Blueprint( "cds_home", __name__, @@ -58,4 +60,5 @@ def init_menu(app): "invenio_deposit_ui.index", _("Upload"), order=2, + visible_when=lambda: has_upload_permission() ) diff --git a/cds/modules/invenio_deposit/utils.py b/cds/modules/invenio_deposit/utils.py index 71b22e05d..1db2d374d 100644 --- a/cds/modules/invenio_deposit/utils.py +++ b/cds/modules/invenio_deposit/utils.py @@ -28,6 +28,7 @@ from flask import request from invenio_oauth2server import require_api_auth, require_oauth_scopes +from cds.modules.ldap.decorators import require_upload_permission from .scopes import write_scope @@ -84,6 +85,7 @@ def check_oauth2_scope(can_method, *myscopes): def check(record, *args, **kwargs): @require_api_auth() + @require_upload_permission() @require_oauth_scopes(*myscopes) def can(self): return can_method(record) diff --git a/cds/modules/invenio_deposit/views/ui.py b/cds/modules/invenio_deposit/views/ui.py index 2d04b33d7..062202326 100644 --- a/cds/modules/invenio_deposit/views/ui.py +++ b/cds/modules/invenio_deposit/views/ui.py @@ -27,6 +27,7 @@ from copy import deepcopy +from cds.modules.ldap.decorators import require_upload_permission from flask import Blueprint, current_app, render_template, request from flask_login import login_required from invenio_pidstore.errors import PIDDeletedError @@ -73,12 +74,14 @@ def tombstone_errorhandler(error): @blueprint.route("/deposit") @login_required + @require_upload_permission() def index(): """List user deposits.""" return render_template(current_app.config["DEPOSIT_UI_INDEX_TEMPLATE"]) @blueprint.route("/deposit/new") @login_required + @require_upload_permission() def new(): """Create new deposit.""" deposit_type = request.values.get("type") diff --git a/cds/modules/ldap/decorators.py b/cds/modules/ldap/decorators.py index d2056e252..28e15d556 100644 --- a/cds/modules/ldap/decorators.py +++ b/cds/modules/ldap/decorators.py @@ -33,3 +33,20 @@ def decorated_api_view(*args, **kwargs): abort(401) return func(*args, **kwargs) return decorated_api_view + + +def require_upload_permission(): + """Restrict access using the has_upload_permission check.""" + def decorator(f): + from cds.modules.records.permissions import has_upload_permission + @wraps(f) + def decorated_function(*args, **kwargs): + if not current_user.is_authenticated: + abort(401) + + if not has_upload_permission(): + abort(403) + + return f(*args, **kwargs) + return decorated_function + return decorator diff --git a/cds/modules/legacy/__init__.py b/cds/modules/legacy/__init__.py new file mode 100644 index 000000000..33eb1ff2e --- /dev/null +++ b/cds/modules/legacy/__init__.py @@ -0,0 +1,25 @@ +# -*- coding: utf-8 -*- +# +# This file is part of CDS. +# Copyright (C) 2025 CERN. +# +# CDS is free software; you can redistribute it +# and/or modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# CDS is distributed in the hope that it will be +# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with CDS; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307, USA. +# +# In applying this license, CERN does not +# waive the privileges and immunities granted to it by virtue of its status +# as an Intergovernmental Organization or submit itself to any jurisdiction. + +"""CDS-Videos legacy migration module.""" diff --git a/cds/modules/legacy/alembic/bf9c38b8dabd_create_legacy_branch.py b/cds/modules/legacy/alembic/bf9c38b8dabd_create_legacy_branch.py new file mode 100644 index 000000000..e325af166 --- /dev/null +++ b/cds/modules/legacy/alembic/bf9c38b8dabd_create_legacy_branch.py @@ -0,0 +1,28 @@ +# +# This file is part of Invenio. +# Copyright (C) 2025 CERN. +# +# Invenio is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +"""Create legacy branch""" + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = 'bf9c38b8dabd' +down_revision = None +branch_labels = ("legacy",) +depends_on = '35c1075e6360' + + +def upgrade(): + """Upgrade database.""" + pass + + +def downgrade(): + """Downgrade database.""" + pass diff --git a/cds/modules/legacy/alembic/f57e61d833b1_create_legacy_table.py b/cds/modules/legacy/alembic/f57e61d833b1_create_legacy_table.py new file mode 100644 index 000000000..f0e84db46 --- /dev/null +++ b/cds/modules/legacy/alembic/f57e61d833b1_create_legacy_table.py @@ -0,0 +1,39 @@ +# +# This file is part of Invenio. +# Copyright (C) 2025 CERN. +# +# Invenio is free software; you can redistribute it and/or modify it +# under the terms of the MIT License; see LICENSE file for more details. + +"""Create legacy table.""" + +from alembic import op +import sqlalchemy as sa +import sqlalchemy_utils +from sqlalchemy.dialects import postgresql + +# revision identifiers, used by Alembic. +revision = 'f57e61d833b1' +down_revision = 'bf9c38b8dabd' +branch_labels = () +depends_on = None + + +def upgrade(): + """Upgrade database.""" + # ### commands auto generated by Alembic - please adjust! ### + op.create_table('cds_migration_legacy_records', + sa.Column('id', sqlalchemy_utils.types.uuid.UUIDType(), nullable=False), + sa.Column('migrated_record_object_uuid', sqlalchemy_utils.types.uuid.UUIDType(), nullable=True, comment='The uuid of the record metadata of the latest record metadata at the time of the migration.'), + sa.Column('legacy_recid', sa.Integer(), nullable=True, comment='The record id in the legacy system'), + sa.Column('json', sa.JSON().with_variant(postgresql.JSONB(none_as_null=True, astext_type=sa.Text()), 'postgresql'), nullable=True, comment='The extracted information of the legacy record before any transformation.'), + sa.PrimaryKeyConstraint('id', name=op.f('pk_cds_migration_legacy_records')) + ) + # ### end Alembic commands ### + + +def downgrade(): + """Downgrade database.""" + # ### commands auto generated by Alembic - please adjust! ### + op.drop_table('cds_migration_legacy_records') + # ### end Alembic commands ### diff --git a/cds/modules/legacy/minters.py b/cds/modules/legacy/minters.py new file mode 100644 index 000000000..46103a66f --- /dev/null +++ b/cds/modules/legacy/minters.py @@ -0,0 +1,38 @@ +# -*- coding: utf-8 -*- +# +# This file is part of CDS. +# Copyright (C) 2025 CERN. +# +# CDS is free software; you can redistribute it +# and/or modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# CDS is distributed in the hope that it will be +# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with CDS; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307, USA. +# +# In applying this license, CERN does not +# waive the privileges and immunities granted to it by virtue of its status +# as an Intergovernmental Organization or submit itself to any jurisdiction. + +"""Minters.""" + +from invenio_pidstore.models import PersistentIdentifier, PIDStatus + + +def legacy_recid_minter(legacy_recid, uuid): + """Legacy_recid minter.""" + PersistentIdentifier.create( + pid_type="lrecid", + pid_value=legacy_recid, + object_type="rec", + object_uuid=uuid, + status=PIDStatus.REGISTERED, + ) diff --git a/cds/modules/legacy/models.py b/cds/modules/legacy/models.py new file mode 100644 index 000000000..a16caa57b --- /dev/null +++ b/cds/modules/legacy/models.py @@ -0,0 +1,66 @@ +# -*- coding: utf-8 -*- +# +# This file is part of CDS. +# Copyright (C) 2025 CERN. +# +# CDS is free software; you can redistribute it +# and/or modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# CDS is distributed in the hope that it will be +# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with CDS; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307, USA. +# +# In applying this license, CERN does not +# waive the privileges and immunities granted to it by virtue of its status +# as an Intergovernmental Organization or submit itself to any jurisdiction. + +"""CDS Migration models.""" + +import json +import uuid + +from invenio_db import db +from sqlalchemy import Column, Integer, String +from sqlalchemy.dialects import postgresql +from sqlalchemy_utils.types import UUIDType + + +class CDSMigrationLegacyRecord(db.Model): + """Store the extracted legacy information for a specific record.""" + + __tablename__ = "cds_migration_legacy_records" + + id = db.Column( + UUIDType, + primary_key=True, + default=uuid.uuid4, + ) + migrated_record_object_uuid = Column( + UUIDType, + nullable=True, + comment="The uuid of the migrated record metadata.", + ) + legacy_recid = Column( + Integer, nullable=True, comment="The record id in the legacy system" + ) + json = db.Column( + db.JSON().with_variant( + postgresql.JSONB(none_as_null=True), + "postgresql", + ), + default=lambda: dict(), + nullable=True, + comment="The extracted information of the legacy record before any transformation.", + ) + + def __repr__(self): + """Representation of the model.""" + return f"" diff --git a/cds/modules/legacy/redirector.py b/cds/modules/legacy/redirector.py new file mode 100644 index 000000000..a6a2c6e22 --- /dev/null +++ b/cds/modules/legacy/redirector.py @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- +# +# This file is part of CDS. +# Copyright (C) 2025 CERN. +# +# CDS is free software; you can redistribute it +# and/or modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# CDS is distributed in the hope that it will be +# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with CDS; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307, USA. +# +# In applying this license, CERN does not +# waive the privileges and immunities granted to it by virtue of its status +# as an Intergovernmental Organization or submit itself to any jurisdiction. + +"""Redirector functions and rules.""" + +from flask import Blueprint, current_app, abort, redirect +from sqlalchemy.orm.exc import NoResultFound + +from .resolver import get_pid_by_legacy_recid + +HTTP_MOVED_PERMANENTLY = 301 + +blueprint = Blueprint( + "cds_legacy", __name__, template_folder="templates", url_prefix="/legacy" +) + +@blueprint.route("/record/", strict_slashes=False) +def legacy_record_redirect(legacy_id): + """Redirect legacy recid.""" + try: + pid = get_pid_by_legacy_recid(legacy_id) + except NoResultFound: + abort(404) + + url_path = f"{current_app.config['SITE_URL']}/record/{pid.pid_value}" + return redirect(url_path, HTTP_MOVED_PERMANENTLY) \ No newline at end of file diff --git a/cds/modules/legacy/resolver.py b/cds/modules/legacy/resolver.py new file mode 100644 index 000000000..eb2c6a1c2 --- /dev/null +++ b/cds/modules/legacy/resolver.py @@ -0,0 +1,43 @@ +# -*- coding: utf-8 -*- +# +# This file is part of CDS. +# Copyright (C) 2025 CERN. +# +# CDS is free software; you can redistribute it +# and/or modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# CDS is distributed in the hope that it will be +# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with CDS; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307, USA. +# +# In applying this license, CERN does not +# waive the privileges and immunities granted to it by virtue of its status +# as an Intergovernmental Organization or submit itself to any jurisdiction. + +"""Resolver.""" + +from invenio_pidstore.models import PersistentIdentifier + + +def get_pid_by_legacy_recid(legacy_recid): + """Get record by pid value.""" + # Get the object uuid from pidstore + recid = PersistentIdentifier.query.filter_by( + pid_value=legacy_recid, object_type="rec", pid_type="lrecid" + ).one() + + # Use the object uuid to get the pid value + record_pid = PersistentIdentifier.query.filter_by( + object_uuid=recid.object_uuid, object_type="rec", pid_type="recid" + ).one() + + return record_pid + diff --git a/cds/modules/oauthclient/cern_openid.py b/cds/modules/oauthclient/cern_openid.py index ae14b7d85..56af1ab73 100644 --- a/cds/modules/oauthclient/cern_openid.py +++ b/cds/modules/oauthclient/cern_openid.py @@ -85,8 +85,10 @@ def find_remote_by_client_id(client_id): def fetch_extra_data(resource): """Return a dict with extra data retrieved from CERN OAuth.""" - person_id = resource.get("cern_person_id") - return dict(person_id=person_id, groups=resource["groups"]) + data = {"groups": resource.get("groups", [])} + if resource.get("cern_person_id"): + data["person_id"] = resource["cern_person_id"] + return data def account_roles_and_extra_data(account, resource, refresh_timedelta=None): @@ -178,10 +180,19 @@ def _account_info(remote, resp): resp, ) - email = resource["email"] - external_id = str(resource["cern_uid"]) - nice = resource["preferred_username"] - name = resource["name"] + email = resource.get("email") + if not email: + raise OAuthCERNRejectedAccountError("No email in userinfo", remote, resp) + + external_id = resource.get("cern_uid") or resource.get("sub") + if not external_id: + raise OAuthCERNRejectedAccountError("No external_id in userinfo", remote, resp) + external_id = str(external_id) + raw_username = resource.get("preferred_username") or email + if "@" in raw_username: + raw_username = raw_username.replace("@", "_").replace(".", "_") + nice = raw_username + name = resource.get("name") or nice return dict( user=dict(email=email.lower(), profile=dict(username=nice, full_name=name)), @@ -231,7 +242,7 @@ def account_setup(remote, token, resp): resource = get_resource(remote, resp) with db.session.begin_nested(): - external_id = resource.get("cern_uid") + external_id = resource.get("cern_uid") or resource.get("sub") # Set CERN person ID in extra_data. token.remote_account.extra_data = {"external_id": external_id} diff --git a/cds/modules/previewer/api.py b/cds/modules/previewer/api.py index d226f2b7e..8c94f3b4b 100644 --- a/cds/modules/previewer/api.py +++ b/cds/modules/previewer/api.py @@ -128,6 +128,18 @@ def vr(self): """Get video's VR flag.""" return self.record.get("vr") + @property + def chapters_uri(self): + """Get the chapters.vtt file link if available.""" + try: + return [ + f["links"]["self"] + for f in self.record["_files"] + if f.get("context_type") == "chapters" and f.get("content_type") == "vtt" + ][0] + except IndexError: + return None + class CDSPreviewDepositFile(PreviewFile): """Preview deposit files implementation.""" diff --git a/cds/modules/previewer/extensions/video.py b/cds/modules/previewer/extensions/video.py index 52ef2c31f..6bbc754e7 100644 --- a/cds/modules/previewer/extensions/video.py +++ b/cds/modules/previewer/extensions/video.py @@ -25,6 +25,7 @@ """Previews video files.""" +from cds.modules.records.utils import parse_video_chapters from flask import render_template @@ -63,7 +64,7 @@ def preview(self, file, embed_config=None): if "report_number" in record and len(record["report_number"]) else "" ) - + return render_template( self.template, file=file, diff --git a/cds/modules/previewer/templates/cds_previewer/macros/player.html b/cds/modules/previewer/templates/cds_previewer/macros/player.html index 9ec2ac671..bf45ca7fe 100644 --- a/cds/modules/previewer/templates/cds_previewer/macros/player.html +++ b/cds/modules/previewer/templates/cds_previewer/macros/player.html @@ -51,45 +51,72 @@ {% endif %} initialRendition: 'first' }); - // Preload - player.source = { - sources: [ - { - {% if video_source %} - src: "{{ video_source }}", - type: 'application/x-mpegURL' - {% elif obj.m3u8_uri and obj.subformats|length > 0 %} - src: '{{ obj.m3u8_uri }}', - type: 'application/x-mpegURL' - {% else %} - src: '{{ obj.uri }}', - type: 'video/mp4' - {% endif %} - }, - ], + + window.top.player = player; + + // --- helpers --- + function durationToSeconds(durationStr) { + if (!durationStr) return null; + const parts = durationStr.split(':').map(Number); // [HH, MM, SS] or [MM, SS] + if (parts.length === 3) { + return parts[0] * 3600 + parts[1] * 60 + parts[2]; + } + if (parts.length === 2) { + return parts[0] * 60 + parts[1]; + } + return null; + } + + // Preload + player.source = { + sources: [ + { + {% if video_source %} + src: "{{ video_source }}", + type: 'application/x-mpegURL' + {% elif obj.m3u8_uri and obj.subformats|length > 0 %} + src: '{{ obj.m3u8_uri }}', + type: 'application/x-mpegURL' + {% else %} + src: '{{ obj.uri }}', + type: 'video/mp4' + {% endif %} + }, + ], + textTracks: [ + { + kind: 'metadata', + src: '{{ obj.thumbnails_uri }}', + label: 'thumbnails', + default: true, + }, + + // Add chapters.vtt if available + {% if obj.chapters_uri %} + { + kind: 'chapters', + src: '{{ obj.chapters_uri }}', + label: 'Chapters', + }, + {% endif %} + + // Add subtitles {% if not embed_config.subtitlesOff %} - textTracks: [ - { - kind: 'metadata', - src: '{{ obj.thumbnails_uri }}', - label: 'thumbnails', - default: true, - }, - {% for uri, lang in obj.subtitles %} - { - kind: 'subtitles', - src: '{{ uri }}', - label: '{{ lang }}', - srclang: '{{ lang }}', - {% if embed_config.subtitles and embed_config.subtitles == lang %} - default: true, - {% endif %} - }, - {% endfor %} - ], + {% for uri, lang in obj.subtitles %} + { + kind: 'subtitles', + src: '{{ uri }}', + label: '{{ lang }}', + srclang: '{{ lang }}', + {% if embed_config.subtitles and embed_config.subtitles == lang %} + default: true, + {% endif %} + }, + {% endfor %} {% endif %} - poster: '{{ obj.poster_uri }}', - {% if obj.vr %} + ], + poster: '{{ obj.poster_uri }}', + {% if obj.vr %} vr: { 360: true, }, @@ -162,6 +189,14 @@ } })(player); {% endif %} + (function() { + const params = new URLSearchParams(window.location.search); + const videoDuration = durationToSeconds({{ (record.duration if record and record.duration else "") | tojson }}); + const startTime = parseInt(params.get('t'), 10); + if (!isNaN(startTime) && startTime >= 0 && startTime < videoDuration) { + player.currentTime = startTime; + } + })(); {% endif %} {%- endmacro %} diff --git a/cds/modules/records/mappings/os-v2/records/videos/video/video-v1.0.0.json b/cds/modules/records/mappings/os-v2/records/videos/video/video-v1.0.0.json index d2e230a8c..c4c589fcf 100644 --- a/cds/modules/records/mappings/os-v2/records/videos/video/video-v1.0.0.json +++ b/cds/modules/records/mappings/os-v2/records/videos/video/video-v1.0.0.json @@ -234,6 +234,9 @@ "recid": { "type": "double" }, + "legacy_recid": { + "type": "double" + }, "doi": { "type": "text" }, @@ -331,6 +334,133 @@ }, "publication_date": { "type": "text" + }, + "alternate_identifiers": { + "properties": { + "scheme": { + "type": "keyword" + }, + "value": { + "type": "keyword" + } + } + }, + "_curation": { + "type": "object", + "properties": { + "legacy_report_number": { + "type": "keyword" + }, + "department": { + "type": "keyword" + }, + "volumes": { + "type": "text" + }, + "physical_location": { + "type": "text" + }, + "physical_medium": { + "type": "text" + }, + "internal_note": { + "type": "text" + }, + "digitized": { + "type": "object", + "properties": { + "url": { + "type": "text" + }, + "format": { + "type": "text" + }, + "link_text": { + "type": "text" + }, + "public_note": { + "type": "text" + }, + "nonpublic_note": { + "type": "text" + }, + "md5_checksum": { + "type": "text" + }, + "source": { + "type": "text" + } + } + }, + "legacy_marc_fields": { + "type": "object", + "properties": { + "964": { + "type": "text" + }, + "336": { + "type": "text" + }, + "583": { + "type": "text" + }, + "306": { + "type": "text" + } + } + } + } + }, + "additional_titles": { + "type": "object", + "properties": { + "title": { + "type": "text" + }, + "type": { + "type": "keyword" + }, + "lang": { + "type": "keyword" + } + } + }, + "additional_descriptions": { + "type": "object", + "properties": { + "description": { + "type": "text" + }, + "type": { + "type": "keyword" + }, + "lang": { + "type": "keyword" + } + } + }, + "related_identifiers": { + "type": "object", + "properties": { + "identifier": { + "type": "text" + }, + "scheme": { + "type": "keyword" + }, + "relation_type": { + "type": "keyword" + }, + "resource_type": { + "type": "keyword" + } + } + }, + "collections": { + "type": "keyword" + }, + "additional_languages": { + "type": "text" } } } diff --git a/cds/modules/records/permissions.py b/cds/modules/records/permissions.py index 295b7a0cb..9a9b37137 100644 --- a/cds/modules/records/permissions.py +++ b/cds/modules/records/permissions.py @@ -25,7 +25,7 @@ from flask import current_app from flask_security import current_user -from invenio_access import Permission +from invenio_access import Permission, action_factory from invenio_files_rest.models import Bucket, MultipartObject, ObjectVersion from invenio_records_files.api import FileObject from invenio_records_files.models import RecordsBuckets @@ -35,6 +35,8 @@ from .utils import get_user_provides, is_deposit, is_record, lowercase_value +upload_access_action = action_factory("videos-upload-access") + def files_permission_factory(obj, action=None): """Permission for files are always based on the type of bucket. @@ -228,7 +230,7 @@ def can(self): def create(cls, record, action, user=None): """Create a record permission.""" if action in cls.create_actions: - return cls(record, allow, user) + return cls(record, has_upload_permission, user) elif action in cls.read_actions: return cls(record, has_read_record_permission, user) elif action in cls.read_eos_path_actions: @@ -334,6 +336,9 @@ def has_update_permission(user, record): """Check if user has update access to the record.""" user_id = int(user.get_id()) if user.is_authenticated else None + if not has_upload_permission(): + return False + # Allow owners deposit_creator = record.get("_deposit", {}).get("created_by", -1) if user_id == deposit_creator: @@ -359,3 +364,8 @@ def has_admin_permission(user=None, record=None): """ # Allow administrators return Permission(action_admin_access).can() + + +def has_upload_permission(*args, **kwargs): + """Return permission to allow only cern users.""" + return Permission(upload_access_action).can() \ No newline at end of file diff --git a/cds/modules/records/schemas/records/videos/project/project-v1.0.0.json b/cds/modules/records/schemas/records/videos/project/project-v1.0.0.json index 756fc1b09..107b09231 100644 --- a/cds/modules/records/schemas/records/videos/project/project-v1.0.0.json +++ b/cds/modules/records/schemas/records/videos/project/project-v1.0.0.json @@ -139,6 +139,7 @@ "Camera Operator", "Comments by", "Co-Producer", + "ContactPerson", "Creator", "Credits", "Director", @@ -147,7 +148,9 @@ "Narrator", "Photography", "Producer", + "RelatedPerson", "Reporter", + "ResearchGroup", "Screenwriter", "Speaker", "Subtitles by", diff --git a/cds/modules/records/schemas/records/videos/video/definitions-v1.0.0.json b/cds/modules/records/schemas/records/videos/video/definitions-v1.0.0.json new file mode 100644 index 000000000..e1c217180 --- /dev/null +++ b/cds/modules/records/schemas/records/videos/video/definitions-v1.0.0.json @@ -0,0 +1,57 @@ +{ + "languages": { + "enum": [ + "ar", + "ast", + "bg", + "ca", + "ch", + "cs", + "cy", + "da", + "de", + "el", + "en", + "en-fr", + "es", + "et", + "eu", + "fi", + "fr", + "ga", + "gd", + "gl", + "he", + "hi", + "hr", + "hu", + "it", + "lt", + "ja", + "ka", + "ko", + "kw", + "nb", + "nl", + "nn", + "no", + "pl", + "pt", + "rm", + "ro", + "ru", + "se", + "silent", + "sk", + "sl", + "sr", + "sv", + "tr", + "uk", + "ur", + "zh", + "zh_CN", + "zh_TW" + ] + } +} diff --git a/cds/modules/records/schemas/records/videos/video/video-v1.0.0.json b/cds/modules/records/schemas/records/videos/video/video-v1.0.0.json index 5ace8d1d3..84ecfcde0 100644 --- a/cds/modules/records/schemas/records/videos/video/video-v1.0.0.json +++ b/cds/modules/records/schemas/records/videos/video/video-v1.0.0.json @@ -62,6 +62,7 @@ "Camera Operator", "Comments by", "Co-Producer", + "ContactPerson", "Creator", "Credits", "Director", @@ -70,7 +71,9 @@ "Narrator", "Photography", "Producer", + "RelatedPerson", "Reporter", + "ResearchGroup", "Screenwriter", "Speaker", "Subtitles by", @@ -201,6 +204,24 @@ "description": "List of identifiers on external systems.", "title": "External identifiers" }, + "alternate_identifiers": { + "items": { + "properties": { + "scheme": { + "title": "Scheme of the identifier (Vocabulary)", + "type": "string", + "enum": ["URL", "DOI", "CDS"] + }, + "value": { + "title": "Value of the identifier", + "type": "string" + } + } + }, + "required": ["value", "scheme"], + "title": "List of alternate identifiers of the record", + "type": "array" + }, "subject": { "additionalProperties": false, "description": "Subject.", @@ -400,59 +421,7 @@ }, "language": { "default": "en", - "enum": [ - "ar", - "ast", - "bg", - "ca", - "ch", - "cs", - "cy", - "da", - "de", - "el", - "en", - "en-fr", - "es", - "et", - "eu", - "fi", - "fr", - "ga", - "gd", - "gl", - "he", - "hi", - "hr", - "hu", - "it", - "lt", - "ja", - "ka", - "ko", - "kw", - "nb", - "nl", - "nn", - "no", - "pl", - "pt", - "rm", - "ro", - "ru", - "se", - "silent", - "sk", - "sl", - "sr", - "sv", - "tr", - "uk", - "ur", - "zh", - "zh_CN", - "zh_TW" - ], + "$ref": "definitions-v1.0.0.json#/languages", "type": "string", "description": "A language of the resource." }, @@ -538,59 +507,7 @@ "language": { "description": "A language of the resource.", "default": "en", - "enum": [ - "ar", - "ast", - "bg", - "ca", - "ch", - "cs", - "cy", - "da", - "de", - "el", - "en", - "en-fr", - "es", - "et", - "eu", - "fi", - "fr", - "ga", - "gd", - "gl", - "he", - "hi", - "hr", - "hu", - "it", - "lt", - "ja", - "ka", - "ko", - "kw", - "nb", - "nl", - "nn", - "no", - "pl", - "pt", - "rm", - "ro", - "ru", - "se", - "silent", - "sk", - "sl", - "sr", - "sv", - "tr", - "uk", - "ur", - "zh", - "zh_CN", - "zh_TW" - ], + "$ref": "definitions-v1.0.0.json#/languages", "type": "string" }, "accelerator_experiment": { @@ -631,11 +548,218 @@ "type": "number", "description": "Invenio record identifier (integer)." }, + "legacy_recid": { + "type": "number", + "description": "Legacy record identifier (integer). Kept for auditing reasons." + }, "original_source": { "type": "string" }, "_project_id": { "type": "string" + }, + "_curation": { + "properties": { + "legacy_report_number": { + "title": "Legacy record report number.", + "type": "array", + "items": { + "type": "string" + } + }, + "department": { + "title": "CERN department.", + "type": "string" + }, + "volumes": { + "title": "Volume list for this record.", + "type": "array", + "items": { + "type": "string" + } + }, + "physical_location": { + "title": "Tag 852 physical location.", + "type": "array", + "items": { + "type": "string" + } + }, + "physical_medium": { + "title": "Tag 340 physical medium.", + "type": "array", + "items": { + "type": "string" + } + }, + "internal_note": { + "title": "Tag 595 internal note.", + "type": "array", + "items": { + "type": "string" + } + }, + "digitized": { + "title": "Digitized metadata.", + "type": "array", + "items": { + "type": "object", + "properties": { + "url": { + "type": "string" + }, + "format": { + "type": "string" + }, + "link_text": { + "type": "string" + }, + "public_note": { + "type": "string" + }, + "nonpublic_note": { + "type": "string" + }, + "md5_checksum": { + "type": "string" + }, + "source": { + "type": "string" + } + } + } + }, + "legacy_marc_fields": { + "type": "object", + "properties": { + "964": { + "title": "Tag 964.", + "type": "array", + "items": { + "type": "string" + } + }, + "336": { + "title": "Tag 336.", + "type": "array", + "items": { + "type": "string" + } + }, + "583": { + "title": "Tag 583.", + "type": "array", + "items": { + "type": "string" + } + }, + "306": { + "title": "Tag 306.", + "type": "array", + "items": { + "type": "string" + } + } + } + } + }, + "title": "Fields that needs curation.", + "description": "This section contains MARC21 metadata fields that could not be mapped during weblectures migration.", + "type": "object" + }, + "additional_titles": { + "description": "Additional record titles.", + "type": "array", + "items": { + "type": "object", + "additionalProperties": false, + "properties": { + "title": { + "description": "Additional title of the record.", + "type": "string" + }, + "type": { + "type": "string", + "enum": ["Subtitle", "Other", "TranslatedTitle", "AlternativeTitle"] + }, + "lang": { + "type": "string", + "$ref": "definitions-v1.0.0.json#/languages" + } + } + } + }, + "additional_descriptions": { + "description": "Additional descriptions for the record.", + "type": "array", + "items": { + "type": "object", + "additionalProperties": false, + "properties": { + "description": { + "type": "string", + "description": "Descriptive content." + }, + "type": { + "type": "string", + "enum": [ + "Abstract", + "Methods", + "Other", + "SeriesInformation", + "TableOfContents", + "TechnicalInfo" + ] + }, + "lang": { + "type": "string", + "$ref": "definitions-v1.0.0.json#/languages" + } + } + } + }, + "related_identifiers": { + "type": "array", + "items": { + "type": "object", + "required": ["identifier", "scheme", "relation_type"], + "additionalProperties": false, + "properties": { + "identifier": { + "type": "string", + "description": "The actual identifier (e.g., URL or DOI)." + }, + "scheme": { + "type": "string", + "enum": ["URL", "DOI", "CDS", "Indico"], + "description": "The scheme describing the identifier type." + }, + "relation_type": { + "type": "string", + "enum": ["IsPartOf", "IsVariantFormOf"], + "description": "Describes the relationship with the current record." + }, + "resource_type": { + "type": "string", + "enum": ["Event", "ConferencePaper", "Report", "Book"], + "description": "Type of the related resource." + } + } + } + }, + "collections": { + "items": { + "type": "string" + }, + "type": "array" + }, + "additional_languages": { + "description": "Additional languages for the record.", + "type": "array", + "items": { + "type": "string", + "$ref": "definitions-v1.0.0.json#/languages" + } } }, "title": "CDS Base Record Schema v1.0.0" diff --git a/cds/modules/records/serializers/json.py b/cds/modules/records/serializers/json.py index 9524d15f9..0a05d85de 100644 --- a/cds/modules/records/serializers/json.py +++ b/cds/modules/records/serializers/json.py @@ -31,7 +31,7 @@ has_read_record_eos_path_permission, has_read_record_permission, ) -from ..utils import HTMLTagRemover, remove_html_tags +from ..utils import HTMLTagRemover, parse_video_chapters, remove_html_tags class CDSJSONSerializer(JSONSerializer): @@ -81,6 +81,12 @@ def preprocess_record(self, pid, record, links_factory=None): except KeyError: # ignore error if keys are missing in the metadata pass + + description = metadata.get('description', '') + if description: + metadata['chapters'] = parse_video_chapters(description) + else: + metadata['chapters'] = [] return result diff --git a/cds/modules/records/serializers/schemas/common.py b/cds/modules/records/serializers/schemas/common.py index bfae055f4..e0e03634f 100644 --- a/cds/modules/records/serializers/schemas/common.py +++ b/cds/modules/records/serializers/schemas/common.py @@ -155,3 +155,63 @@ class ExternalSystemIdentifiersField(StrictKeysSchema): value = fields.Str() schema = fields.Str() + + +class AlternateIdentifiersSchema(StrictKeysSchema): + """Field alternate_identifiers.""" + + value = fields.Str(required=True) + scheme = fields.Str(required=True) + + +class LegacyMARCFieldsSchema(Schema): + tag_964 = fields.List(fields.Str(), data_key="964") + tag_336 = fields.List(fields.Str(), data_key="336") + tag_583 = fields.List(fields.Str(), data_key="583") + tag_306 = fields.List(fields.Str(), data_key="306") + + +class DigitizedMetadataSchema(Schema): + url = fields.Str() + format = fields.Str() + link_text = fields.Str() + public_note = fields.Str() + nonpublic_note = fields.Str() + md5_checksum = fields.Str() + source = fields.Str() + + +class CurationSchema(StrictKeysSchema): + """Curation schema.""" + + legacy_report_number = fields.List(fields.Str()) + department = fields.Str() + volumes = fields.List(fields.Str()) + physical_location = fields.List(fields.Str()) + physical_medium = fields.List(fields.Str()) + internal_note = fields.List(fields.Str()) + legacy_marc_fields = fields.Nested(LegacyMARCFieldsSchema) + digitized = fields.Nested(DigitizedMetadataSchema) + + +class AdditionalTitlesSchema(Schema): + """Additional titles schema.""" + + title = fields.Str() + type = fields.Str() + lang = fields.Str() + + +class AdditionalDescriptionsSchema(Schema): + """Additional descriptions schema.""" + + description = fields.Str() + type = fields.Str() + lang = fields.Str() + + +class RelatedIdentifiersSchema(Schema): + identifier = fields.Str(required=True) + scheme = fields.Str(required=True) + relation_type = fields.Str(required=True) + resource_type = fields.Str() \ No newline at end of file diff --git a/cds/modules/records/serializers/schemas/datacite.py b/cds/modules/records/serializers/schemas/datacite.py index 3c991b249..44776fe03 100644 --- a/cds/modules/records/serializers/schemas/datacite.py +++ b/cds/modules/records/serializers/schemas/datacite.py @@ -53,6 +53,7 @@ class DataCiteSchemaV1(Schema): """DataCite schema v1.""" creators = fields.Method("get_creators") + contributors = fields.Method("get_contributors") dates = fields.Method("get_dates") descriptions = fields.Method("get_descriptions") identifier = fields.Nested(IdentifierSchema, attribute="metadata.doi") @@ -101,23 +102,27 @@ def get_creators(self, obj): """Get creators.""" items = [] for item in obj["metadata"].get("contributors", []): - items.append( - { - "creatorName": item.get("name", ""), - } + if item.get("role", "") != "ResearchGroup": + items.append( + { + "creatorName": item.get("name", ""), + } ) return items - # def get_contributors(self, obj): - # """Get contributors.""" - # items = [] - # for item in obj['metadata'].get('contributors', []): - # items.append({ - # 'contributorType': item.get('role', ''), - # 'contributorName': item.get('name', ''), - # # FIXME nameIdentifier and nameIdentifierScheme, ... ? - # }) - # return items + def get_contributors(self, obj): + """Get contributors.""" + items = [] + for item in obj['metadata'].get('contributors', []): + if item.get("role", "") == "ResearchGroup": + items.append( + { + 'contributorType': item.get('role', ''), + 'contributorName': item.get('name', ''), + # FIXME nameIdentifier and nameIdentifierScheme, ... ? + } + ) + return items def get_publication_year(self, obj): """Get publication year.""" diff --git a/cds/modules/records/serializers/schemas/video.py b/cds/modules/records/serializers/schemas/video.py index c83e2e83a..66e9f763f 100644 --- a/cds/modules/records/serializers/schemas/video.py +++ b/cds/modules/records/serializers/schemas/video.py @@ -19,25 +19,31 @@ """Video JSON schema.""" from invenio_jsonschemas import current_jsonschemas -from marshmallow import Schema, fields, pre_load, post_load +from marshmallow import Schema, fields, pre_load, post_load, post_dump from ....deposit.api import Video from ..fields.datetime import DateString from .common import ( AccessSchema, + AdditionalTitlesSchema, + AdditionalDescriptionsSchema, + AlternateIdentifiersSchema, BucketSchema, ContributorSchema, + CurationSchema, DepositSchema, ExternalSystemIdentifiersField, KeywordsSchema, LicenseSchema, OaiSchema, + RelatedIdentifiersSchema, RelatedLinksSchema, StrictKeysSchema, TitleSchema, TranslationsSchema, ) from .doi import DOI +from ...utils import parse_video_chapters class _CDSSSchema(Schema): @@ -142,6 +148,7 @@ class VideoSchema(StrictKeysSchema): note = fields.Str() publication_date = fields.Str() recid = fields.Number() + legacy_recid =fields.Number() related_links = fields.Nested(RelatedLinksSchema, many=True) report_number = fields.List(fields.Str, many=True) schema = fields.Str(attribute="$schema", data_key="$schema") @@ -149,7 +156,18 @@ class VideoSchema(StrictKeysSchema): translations = fields.Nested(TranslationsSchema, many=True) type = fields.Str() vr = fields.Boolean() - + _curation = fields.Nested(CurationSchema) + additional_titles = fields.List(fields.Nested(AdditionalTitlesSchema)) + additional_descriptions = fields.List(fields.Nested(AdditionalDescriptionsSchema)) + alternate_identifiers = fields.Nested( + AlternateIdentifiersSchema, many=True + ) + related_identifiers = fields.Nested( + RelatedIdentifiersSchema, many=True + ) + collections = fields.List(fields.Str, many=True) + additional_languages = fields.List(fields.Str, many=True) + # Preservation fields location = fields.Str() original_source = fields.Str() @@ -160,3 +178,4 @@ def post_load(self, data, **kwargs): """Post load.""" data["$schema"] = current_jsonschemas.path_to_url(Video._schema) return data + diff --git a/cds/modules/records/static/templates/cds_records/video/detail.html b/cds/modules/records/static/templates/cds_records/video/detail.html index c253f1a43..e7fbc15de 100644 --- a/cds/modules/records/static/templates/cds_records/video/detail.html +++ b/cds/modules/records/static/templates/cds_records/video/detail.html @@ -4,14 +4,119 @@
    -
    - +
    +
    +
    +
    + +
    +
    + + +
    +
    +
    +

    + In this video +
    +

    +
    + + + + +
    +
    + +
    +
    +
      +
    • +
      + {{ convertToMinutesSeconds(line.start) }} - {{ convertToMinutesSeconds(line.end) }} +
      +
      + {{ line.text }} +
      +
    • +
    +
    +
    + + +
    +
    + + +
    +
      +
    • +
      +
      +
      + Chapter {{ chapter.timestamp }} +
      + +
      +
      +
      + +
      +
      + {{ cleanHtmlFromTitle(chapter.title) }} +
      +
      + {{ chapter.timestamp }} +
      +
      +
      +
    • +
    +
    + +
    +
    +
    @@ -64,7 +169,7 @@

    - +
    {{translation.language | isoToLanguage}}

    {{translation.title.title}}

    @@ -130,7 +235,7 @@

    {{translation.title.title}}

    -

    +

    @@ -181,6 +286,7 @@

    {{translation.title.title}}

    +
    @@ -199,9 +305,81 @@

    {{translation.title.title}}

    + + +
    +
    +
    +

    Chapters

    + +
    + + +
    +
    +
    + +
    + Chapter {{ chapter.timestamp }} +
    + +
    + +
    + {{ chapter.timestamp }} +
    +
    + +
    +
    + {{ cleanHtmlFromTitle(chapter.title) }} +
    +
    +
    +
    +
    + +
    + +
    + +
    +
    +
    +

    Transcriptions

    + Follow along or search within the transcript. +
    + +
    + +
    +
    + + +
    @@ -212,10 +390,9 @@

    - -
    +
    diff --git a/cds/modules/records/static/templates/cds_records/video/downloads.html b/cds/modules/records/static/templates/cds_records/video/downloads.html index c5fc4888b..26ddb78e1 100644 --- a/cds/modules/records/static/templates/cds_records/video/downloads.html +++ b/cds/modules/records/static/templates/cds_records/video/downloads.html @@ -86,27 +86,27 @@

    - -
    + +
    - - + - -
    + +
    - + - -
    + +
    - + + diff --git a/cds/modules/records/static/templates/cds_records/video/related_event_section.html b/cds/modules/records/static/templates/cds_records/video/related_event_section.html new file mode 100644 index 000000000..606726d26 --- /dev/null +++ b/cds/modules/records/static/templates/cds_records/video/related_event_section.html @@ -0,0 +1,39 @@ + diff --git a/cds/modules/records/static/templates/cds_records/video/share.html b/cds/modules/records/static/templates/cds_records/video/share.html index 6efcd0688..5e0624ec6 100644 --- a/cds/modules/records/static/templates/cds_records/video/share.html +++ b/cds/modules/records/static/templates/cds_records/video/share.html @@ -1,3 +1,46 @@ + + + +

    Social media

    diff --git a/cds/modules/records/templates/cds_records/record_detail.html b/cds/modules/records/templates/cds_records/record_detail.html index e1269dfc3..2239c2c3b 100644 --- a/cds/modules/records/templates/cds_records/record_detail.html +++ b/cds/modules/records/templates/cds_records/record_detail.html @@ -93,11 +93,14 @@ {% set user_action_media_download_url = '' %} {% endif %} {# TODO: remove and replace with jinja ? #} + {% set related_query_url = config.CDS_RECORDS_RELATED_QUERY or '' %}
    diff --git a/cds/modules/records/utils.py b/cds/modules/records/utils.py index 542e590cb..2c6684512 100644 --- a/cds/modules/records/utils.py +++ b/cds/modules/records/utils.py @@ -26,10 +26,13 @@ import json +import re +from datetime import timedelta from html import unescape from urllib import parse import six +from cds.modules.records.api import CDSVideosFilesIterator from flask import current_app, g, request from flask_security import current_user from invenio_db import db @@ -482,3 +485,89 @@ def to_string(value): return value else: return json.dumps(value) + + +def get_existing_chapter_frame_timestamps(deposit): + """Get timestamps of existing chapter frames.""" + master_file = CDSVideosFilesIterator.get_master_video_file(deposit) + frames = CDSVideosFilesIterator.get_video_frames(master_file) + + existing = set() + for f in frames: + tags = f.get("tags", {}) + if tags.get("is_chapter_frame") == "true": + existing.add(float(tags.get("timestamp"))) + return existing + + +def parse_video_chapters(description): + """Parse YouTube-style chapter timestamps from video description. + + Looks for patterns like: + 00:00 Introduction + 0:30 Getting Started + 1:23:45 Advanced Topics + + Args: + description (str): Video description text + + Returns: + list: List of chapter dicts with 'timestamp', 'seconds', and 'title' keys + """ + html_tag_remover = HTMLTagRemover() + if not description: + return [] + + # Regex pattern to match timestamp formats: + # - 0:00, 00:00, 0:0, 00:0, 0:00:00, 00:00:00, etc. + # - Followed by optional space/tab and chapter title + pattern = r'(?:^|\n)\s*(\d{1,2}:(?:\d{1,2}:)?\d{1,2})\s*[-\s]*(.+?)(?=\n|$)' + + chapters = [] + matches = re.findall(pattern, description, re.MULTILINE) + + for timestamp_str, title in matches: + # Parse timestamp to seconds + time_parts = timestamp_str.split(':') + if len(time_parts) == 2: # MM:SS format + minutes, seconds = map(int, time_parts) + total_seconds = minutes * 60 + seconds + elif len(time_parts) == 3: # HH:MM:SS format + hours, minutes, seconds = map(int, time_parts) + total_seconds = hours * 3600 + minutes * 60 + seconds + else: + continue + + # Clean up title + title = remove_html_tags(html_tag_remover, title).strip() + if title: + chapters.append({ + 'timestamp': timestamp_str, + 'seconds': total_seconds, + 'title': title + }) + + # Sort chapters by timestamp + chapters.sort(key=lambda x: x['seconds']) + + return chapters + + +def seconds_to_timestamp(seconds): + """Convert seconds to timestamp string (MM:SS or HH:MM:SS). + + Args: + seconds (int): Number of seconds + + Returns: + str: Formatted timestamp string + """ + td = timedelta(seconds=seconds) + hours = td.seconds // 3600 + minutes = (td.seconds % 3600) // 60 + secs = td.seconds % 60 + + if hours > 0: + return f"{hours}:{minutes:02d}:{secs:02d}" + else: + return f"{minutes}:{secs:02d}" diff --git a/cds/modules/theme/assets/bootstrap3/js/cds/app.js b/cds/modules/theme/assets/bootstrap3/js/cds/app.js index ef018e020..6a8f98b7b 100644 --- a/cds/modules/theme/assets/bootstrap3/js/cds/app.js +++ b/cds/modules/theme/assets/bootstrap3/js/cds/app.js @@ -131,4 +131,7 @@ angular.element(document).ready(function () { ["cds", "invenioSearch"], { strictDi: true } ); + document.querySelectorAll(".cds-dynamic-results").forEach((el) => { + angular.bootstrap(el, ["cds", "invenioSearch"], { strictDi: true }); + }); }); diff --git a/cds/modules/theme/assets/bootstrap3/js/cds/module.js b/cds/modules/theme/assets/bootstrap3/js/cds/module.js index 3d6493724..b8e27af53 100644 --- a/cds/modules/theme/assets/bootstrap3/js/cds/module.js +++ b/cds/modules/theme/assets/bootstrap3/js/cds/module.js @@ -163,6 +163,12 @@ app.filter("previewIframeSrc", [ function ($sce, $window) { return function (text, id, key, external) { var _url = "/record/" + id + "/preview/" + key; + // Pass through timestamp query parameter if present + var urlParams = new URLSearchParams($window.location.search); + var timestamp = urlParams.get("t"); + if (timestamp) { + _url += "?t=" + timestamp; + } if (external) { _url = $window.location.origin + _url; } @@ -341,6 +347,24 @@ app.filter("ellipsis", function () { }; }); +app.filter("middleEllipsis", function () { + return function (text, length) { + if (!text || text.length <= length) return text; + + const dotIndex = text.lastIndexOf("."); + const hasExtension = dotIndex > 0; + + if (hasExtension) { + const namePart = text.substring(0, dotIndex); + const extensionPart = text.substring(dotIndex); + + return namePart.substr(0, length) + " [...]" + extensionPart; + } + + return text.substr(0, length) + " [...]"; + }; +}); + // Trust as html app.filter("trustHtml", [ "$sce", @@ -403,7 +427,7 @@ app.filter("getFilesByType", function () { } return files.filter(function (file) { - return types.indexOf(file.context_type) !== -1; + return types.indexOf(file.media_type) !== -1; }); }; }); @@ -429,7 +453,7 @@ app.filter("getAllFilesExcept", function () { } return files.filter(function (file) { - return types.indexOf(file.context_type) == -1; + return types.indexOf(file.media_type) == -1; }); }; }); @@ -733,3 +757,17 @@ app.filter("assembleShareURL", [ }; }, ]); + + +angular.module("cds").directive("bootstrapInvenioSearch", function () { + return { + restrict: "A", + link: function (scope, element) { + try { + angular.bootstrap(element[0], ["cds", "invenioSearch"], { strictDi: true }); + } catch (e) { + if (!/already bootstrapped/.test(e.message)) throw e; + } + }, + }; +}); diff --git a/cds/modules/theme/assets/bootstrap3/js/cds_deposit/avc/components/cdsDeposit.js b/cds/modules/theme/assets/bootstrap3/js/cds_deposit/avc/components/cdsDeposit.js index c7de1fa1f..632e1cc35 100644 --- a/cds/modules/theme/assets/bootstrap3/js/cds_deposit/avc/components/cdsDeposit.js +++ b/cds/modules/theme/assets/bootstrap3/js/cds_deposit/avc/components/cdsDeposit.js @@ -236,7 +236,16 @@ function cdsDepositCtrl( }; }); _.forEach(that.record._cds.state, function (value, state) { - that.stateReporter[state].status = value; + if (!that.stateReporter[state]) { + // Task not in mainStatuses, add `file_video_extract_chapter_frames` + that.stateReporter[state] = { + status: value, + message: state + }; + } else { + // Update existing + that.stateReporter[state].status = value; + } }); that.calculateCurrentDepositStatus(); }; @@ -452,6 +461,10 @@ function cdsDepositCtrl( if (status && !info.status) { info.status = status; } + if (!that.stateReporter[name]) { + // New task not in mainStatuses, add `file_video_extract_chapter_frames` + that.stateReporter[name] = info; + } if (that.stateReporter[name].status !== info.status) { // Get metadata $scope.$broadcast("cds.deposit.task", name, info.status, info); @@ -471,8 +484,10 @@ function cdsDepositCtrl( } that.currentStartedTaskName = currentStartedTaskName; - // Change the Deposit Status - var values = _.values(that.record._cds.state); + // Change the Deposit Status, ignore `file_video_extract_chapter_frames` + var values = _.values( + _.omit(that.record._cds.state, "file_video_extract_chapter_frames") + ); if (!values.length) { that.currentDepositStatus = null; } else if (values.includes(depositStatuses.FAILURE)) { diff --git a/cds/modules/theme/assets/bootstrap3/js/cds_deposit/avc/components/cdsUploader.js b/cds/modules/theme/assets/bootstrap3/js/cds_deposit/avc/components/cdsUploader.js index dfadbf262..ce70be33d 100644 --- a/cds/modules/theme/assets/bootstrap3/js/cds_deposit/avc/components/cdsUploader.js +++ b/cds/modules/theme/assets/bootstrap3/js/cds_deposit/avc/components/cdsUploader.js @@ -115,7 +115,10 @@ function cdsUploaderCtrl( if (!upload.key) { upload.key = upload.name; } - if (that.cdsDepositsCtrl.isVideoFile(upload.key)) { + if ( + !upload.isAdditional && + that.cdsDepositsCtrl.isVideoFile(upload.key) + ) { _subpromise = Upload.http(_startWorkflow(upload, response)); } else { var d = $q.defer(); @@ -278,24 +281,46 @@ function cdsUploaderCtrl( } // Remove any invalid files _files = _.difference(_files, invalidFiles || []); + + // Filter out files without a valid MIME type or with zero size + _files = _files.filter((file) => { + if (!file.type || file.type.trim() === "") { + toaster.pop( + "warning", + "Invalid File Type", + `The file ${file.name} has no valid type.` + ); + return false; // Exclude invalid files + } + + if (!file.size || file.size === 0) { + toaster.pop( + "warning", + "Empty File", + `The file ${file.name} is empty and cannot be uploaded.` + ); + return false; // Exclude zero-size files + } + + return true; + }); + // Make sure they have proper metadata angular.forEach(_files, function (file) { file.key = file.name; file.local = !file.receiver; + file.isAdditional = true; // Add any extra paramemters to the files if (extraHeaders) { file.headers = extraHeaders; } - }); - - // Add the files to the list - var masterFile = that.cdsDepositCtrl.findMasterFile() || {}; - var videoFiles = _.values( - that.cdsDepositsCtrl.filterOutFiles(_files).videos - ); - // Exclude video files - _files = _.difference(_files, videoFiles); + if (!extraHeaders || !("X-Invenio-File-Tags" in extraHeaders)) { + file.headers = { + "X-Invenio-File-Tags": "context_type=additional_file", + }; + } + }); // Find if any of the existing files has been replaced // (file with same filename), and if yes remove it from the existing @@ -323,6 +348,44 @@ function cdsUploaderCtrl( Array.prototype.push.apply(that.files, _files); // Add the files to the queue Array.prototype.push.apply(that.queue, _files); + + // Start upload automatically if the option is selected + if (that.autoStartUpload) { + that.upload(); + } + }; + + this.replaceMasterFile = function (_files, invalidFiles) { + // Do nothing if files array is empty + if (!_files) { + return; + } + // Remove any invalid files + _files = _.difference(_files, invalidFiles || []); + // Make sure they have proper metadata + angular.forEach(_files, function (file) { + file.key = file.name; + file.local = !file.receiver; + }); + + // Add the files to the list + var masterFile = that.cdsDepositCtrl.findMasterFile() || {}; + var videoFiles = _.values( + that.cdsDepositsCtrl.filterOutFiles(_files).videos + ); + + if ((invalidFiles || []).length > 0) { + // Push a notification + toaster.pop({ + type: "error", + title: + "Invalid file(s) for " + + (that.cdsDepositCtrl.record.title.title || "video."), + body: _.map(invalidFiles, "name").join(", "), + bodyOutputType: "trustedHtml", + }); + } + if (!that.cdsDepositCtrl.master) { // Check for new master file var newMasterFile = videoFiles[0]; @@ -358,11 +421,6 @@ function cdsUploaderCtrl( }); } } - - // Start upload automatically if the option is selected - if (that.autoStartUpload) { - that.upload(); - } }; // Prepare file request @@ -431,13 +489,26 @@ function cdsUploaderCtrl( function error(response) { // Inform the parents $scope.$emit("cds.deposit.error", response); - // Error uploading notification - toaster.pop({ - type: "error", - title: "Error uploading the file(s).", - body: (_.map(response, "config.data.key") || []).join(", "), - bodyOutputType: "trustedHtml", - }); + // Check if the response contains the error message + if ( + response.status === 400 && + response.data && + response.data.message + ) { + toaster.pop({ + type: "error", + title: response.data.message, + bodyOutputType: "trustedHtml", + }); + } else { + // Error uploading notification + toaster.pop({ + type: "error", + title: "Error uploading the file(s).", + body: (_.map(response, "config.data.key") || []).join(", "), + bodyOutputType: "trustedHtml", + }); + } } ) .finally(function done() { @@ -477,6 +548,15 @@ function cdsUploaderCtrl( return match.length > 1 && match[1] in isoLanguages; }; + this.validateAdditionalFiles = function (_file) { + // If it's a .vtt file, validate as subtitle + if (_file.name.toLowerCase().endsWith(".vtt")) { + return this.validateSubtitles(_file); + } + // Accept other types + return true; + }; + this.updateFile = function (key, data, force) { var index = this.findFileIndex(that.files, key); if (index != -1) { diff --git a/cds/modules/theme/assets/bootstrap3/js/cds_deposit/avc/filters/overallState.js b/cds/modules/theme/assets/bootstrap3/js/cds_deposit/avc/filters/overallState.js index 762774541..ff54133cc 100644 --- a/cds/modules/theme/assets/bootstrap3/js/cds_deposit/avc/filters/overallState.js +++ b/cds/modules/theme/assets/bootstrap3/js/cds_deposit/avc/filters/overallState.js @@ -3,7 +3,7 @@ import _ from "lodash"; function overallState(depositStatuses) { return function (tasks) { - var values = _.values(tasks); + var values = _.values(_.omit(tasks, "file_video_extract_chapter_frames")); if (values.length !== 0) { if (_.includes(values, "FAILURE")) { return depositStatuses.FAILURE; diff --git a/cds/modules/theme/assets/bootstrap3/js/cds_records/app.js b/cds/modules/theme/assets/bootstrap3/js/cds_records/app.js index e71424057..31db43e50 100644 --- a/cds/modules/theme/assets/bootstrap3/js/cds_records/app.js +++ b/cds/modules/theme/assets/bootstrap3/js/cds_records/app.js @@ -26,6 +26,7 @@ import "./cdsRecord"; import "angular-sanitize"; import "angular-strap"; import "invenio-files-js/dist/invenio-files-js"; +import "invenio-search-js/dist/invenio-search-js"; import "ngmodal"; import "./user_actions_logger"; @@ -42,6 +43,7 @@ angular.element(document).ready(function () { "ngclipboard", "invenioFiles.filters", "ngSanitize", + "invenioSearch", ], { strictDi: true } ); diff --git a/cds/modules/theme/assets/bootstrap3/js/cds_records/cdsRecord.js b/cds/modules/theme/assets/bootstrap3/js/cds_records/cdsRecord.js index 5ded313f3..34bd81ce9 100644 --- a/cds/modules/theme/assets/bootstrap3/js/cds_records/cdsRecord.js +++ b/cds/modules/theme/assets/bootstrap3/js/cds_records/cdsRecord.js @@ -26,6 +26,7 @@ */ import angular from "angular"; +import { WebVTT } from "vtt.js"; import { getCookie } from "../getCookie"; @@ -38,7 +39,7 @@ import { getCookie } from "../getCookie"; * @description * CDS record controller. */ -function cdsRecordController($scope, $sce, $http) { +function cdsRecordController($scope, $sce, $http, $timeout, $filter) { // Parameters // Assign the controller to `vm` @@ -53,11 +54,468 @@ function cdsRecordController($scope, $sce, $http) { // Record Warn - if the cdsRecord has any warning vm.cdsRecordWarning = null; + $scope.transcriptsByLanguage = {}; + $scope.transcript = []; + $scope.filteredTranscript = []; + $scope.selectedTranscriptLanguage = null; + $scope.search = { transcriptSearch: "" }; + $scope.chapters = []; + $scope.activeTab = "chapters"; // Default to chapters tab + $scope.shortDescription = ""; + $scope.fullDescription = ""; + $scope.chapterFrames = {}; + const REQUEST_HEADERS = { "Content-Type": "application/json", "X-CSRFToken": getCookie("csrftoken"), }; + $scope.scrollToElement = function (id) { + setTimeout(function () { + const el = document.getElementById(id); + if (el) { + const rect = el.getBoundingClientRect(); + const isVisible = + rect.top >= 0 && + rect.bottom <= + (window.innerHeight || document.documentElement.clientHeight); + + if (!isVisible) { + const topOffset = rect.top + window.scrollY - 60; // adjust for sticky header + window.scrollTo({ top: topOffset, behavior: "smooth" }); + } + } else { + console.warn("Element not found:", id); + } + }, 100); + }; + + $scope.seekTo = function (timecode) { + const player = window.top.player; + if (player) { + if (timecode < 0 || timecode > player.duration) { + console.warn("Invalid timecode:", timecode); + return; + } + player.currentTime = timecode; + if (player.paused) { + try { + const playPromise = player.play(); + if (playPromise && playPromise.catch) { + playPromise.catch(function (err) { + console.warn("Autoplay might be blocked by the browser:", err); + }); + } + } catch (err) { + console.warn("Error playing video:", err); + } + } + } else { + console.warn("Player not available"); + } + }; + + $scope.jumpToChapter = function (timecode) { + $scope.scrollToElement("videoPlayerSection"); + $scope.seekTo(timecode); + }; + + $scope.closeInThisVideoSection = function () { + $scope.showInThisVideoSection = false; + }; + + $scope.toggleInThisVideo = function (tab) { + $scope.showInThisVideoSection = true; + $scope.activeTab = tab; + + // Jump to Transcriptions section + if ($scope.showInThisVideoSection) { + $scope.scrollToElement("inThisVideoSection"); + } + }; + + $scope.parseVttFromUrl = function (url, type, lang) { + fetch(url) + .then((res) => res.text()) + .then(function (vttText) { + const parser = new WebVTT.Parser(window, WebVTT.StringDecoder()); + const cues = {}; + + parser.oncue = function (cue) { + cues[cue.text] = { + start: cue.startTime, + end: cue.endTime, + text: cue.text, + }; + }; + + parser.parse(vttText); + parser.flush(); + + $timeout(function () { + if (type === "transcript") { + $scope.transcriptsByLanguage[lang] = cues; + + // Use the first one that loads + if (!$scope.selectedTranscriptLanguage) { + $scope.transcript = cues; + $scope.filterTranscript(); + $scope.selectedTranscriptLanguage = lang; + } + } else { + console.warn("Unknown type for VTT parsing:", type); + } + }); + }) + .catch(function (err) { + console.error("VTT parsing failed", err); + }); + }; + + $scope.filterTranscript = function () { + var searchTerm = $scope.search.transcriptSearch.toLowerCase(); + $scope.filteredTranscript = Object.values($scope.transcript).filter( + function (line) { + return ( + !searchTerm || + (line.text && line.text.toLowerCase().indexOf(searchTerm) !== -1) + ); + } + ); + }; + + $scope.$watch("transcript", function (newVal) { + if (newVal) $scope.filterTranscript(); + }); + + $scope.$watch("record", function (newVal) { + if (newVal) { + $scope.initVttLoad(newVal); + $scope.prepareDescriptions(newVal.metadata.description); + $scope.buildChapterFrames(); + } + }); + + $scope.prepareDescriptions = function (description) { + if (!description) { + $scope.shortDescription = "No description"; + $scope.fullDescription = "No description"; + return; + } + + const lines = description.split(/\r?\n/); + const firstTen = lines + .slice(0, $scope.DESCRIPTION_PREVIEW_LINES) + .join("\n"); + + $scope.shortDescription = + $scope.processDescriptionWithClickableTimestamps(firstTen); + $scope.fullDescription = + $scope.processDescriptionWithClickableTimestamps(description); + }; + + $scope.initVttLoad = function (record) { + const files = record.metadata._files || []; + + // Subtitles (transcripts) + const transcriptVttFiles = files.filter( + (f) => f.context_type === "subtitle" && f.content_type === "vtt" + ); + + // Step 2: If found, load it + transcriptVttFiles.forEach((file) => { + const lang = file.tags.language || "unknown"; + if (file.links?.self) { + $scope.parseVttFromUrl(file.links.self, "transcript", lang); + } else { + console.warn("No subtitle file found."); + } + }); + + // Use chapters from API or parse from description as fallback + if (record.metadata.chapters && record.metadata.chapters.length > 0) { + $scope.chapters = record.metadata.chapters; + } + + // Set default active tab based on what's available (prioritize chapters) + const hasTranscripts = (record.metadata._files || []).some( + (f) => f.context_type === "subtitle" && f.content_type === "vtt" + ); + + if ($scope.chapters.length > 0) { + $scope.activeTab = "chapters"; + } else if (hasTranscripts) { + $scope.activeTab = "transcript"; + } + }; + + $scope.setTranscriptLanguage = function (lang) { + if ($scope.transcriptsByLanguage[lang]) { + $scope.transcript = $scope.transcriptsByLanguage[lang]; + $scope.filterTranscript(); + } else { + console.warn("Transcript not found for language:", lang); + } + }; + + function getScrollableParent(el) { + while (el && el !== document.body) { + const style = window.getComputedStyle(el); + const overflowY = style.overflowY; + if (overflowY === "auto" || overflowY === "scroll") { + return el; + } + el = el.parentElement; + } + return null; + } + + // Follow transcriptions + $scope.currentTranscriptLine = null; + function updateTranscriptHighlight() { + const player = window.top.player; + if (!player || !$scope.transcript) return; + + const currentTime = player.currentTime; + const lines = Object.values($scope.transcript); + + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + if (currentTime >= line.start && currentTime <= line.end) { + if ($scope.currentTranscriptLine !== line) { + $scope.currentTranscriptLine = line; + $scope.$applyAsync(); // Trigger Angular update + + // Auto-scroll to the active line + setTimeout(() => { + const el = document.querySelector(".transcript-line.active"); + const container = getScrollableParent(el); + + if (el && container) { + const elRect = el.getBoundingClientRect(); + const containerRect = container.getBoundingClientRect(); + + const currentScroll = container.scrollTop; + const topOffset = elRect.top - containerRect.top; + + const targetScroll = currentScroll + topOffset - 10; + + container.scrollTo({ + top: targetScroll, + behavior: "smooth", + }); + } + }, 50); + } + return; + } + } + + $scope.currentTranscriptLine = null; + $scope.$applyAsync(); + } + + $scope.currentChapter = null; + function updateChapterHighlight() { + const player = window.top.player; + if (!player || !$scope.chapters || $scope.chapters.length === 0) return; + + const currentTime = player.currentTime; + + for (let i = 0; i < $scope.chapters.length; i++) { + const chapter = $scope.chapters[i]; + const nextChapter = $scope.chapters[i + 1]; + + // If current time is within this chapter range + if ( + currentTime >= chapter.seconds && + (!nextChapter || currentTime < nextChapter.seconds) + ) { + if ($scope.currentChapter !== chapter) { + $scope.currentChapter = chapter; + $scope.$applyAsync(); + + // Auto-scroll to active chapter + setTimeout(() => { + const el = document.querySelector(".chapter-item.active"); + const container = getScrollableParent(el); + + if (el && container) { + const elRect = el.getBoundingClientRect(); + const containerRect = container.getBoundingClientRect(); + const currentScroll = container.scrollTop; + const topOffset = elRect.top - containerRect.top; + const targetScroll = currentScroll + topOffset - 10; + + container.scrollTo({ + top: targetScroll, + behavior: "smooth", + }); + } + }, 50); + } + return; + } + } + + // No chapter active + $scope.currentChapter = null; + $scope.$applyAsync(); + } + + let transcriptTimer = setInterval(updateTranscriptHighlight, 100); + let chapterTimer = setInterval(updateChapterHighlight, 100); + + $scope.$on("$destroy", function () { + clearInterval(transcriptTimer); + clearInterval(chapterTimer); + }); + + $scope.convertToMinutesSeconds = function (seconds) { + const minutes = Math.floor(seconds / 60); + const secs = Math.floor(seconds % 60); + + // Pad with zero if needed + const paddedSecs = secs < 10 ? "0" + secs : secs; + + return `${minutes}:${paddedSecs}`; + }; + + $scope.setActiveTab = function (tab) { + $scope.activeTab = tab; + }; + + $scope.processDescriptionWithClickableTimestamps = function (description) { + if (!description) return description; + + // Regex pattern to match timestamp formats: 0:00, 00:00, 0:00:00, 00:00:00 + const pattern = /(\d{1,2}:(?:\d{1,2}:)?\d{1,2})/g; + + return description.replace(pattern, function (match) { + // Parse timestamp to seconds for the seek function + const timeParts = match.split(":"); + let totalSeconds; + + if (timeParts.length === 2) { + const [minutes, seconds] = timeParts.map(Number); + totalSeconds = minutes * 60 + seconds; + } else if (timeParts.length === 3) { + const [hours, minutes, seconds] = timeParts.map(Number); + totalSeconds = hours * 3600 + minutes * 60 + seconds; + } else { + return match; // Return unchanged if invalid format + } + + // Return clickable timestamp using onclick for ng-bind-html compatibility + return `${match}`; + }); + }; + + $scope.buildChapterFrames = function () { + if (!$scope.record || !$scope.chapters) return; + + const master = $filter("findMaster")($scope.record); + if (!master || !master.frame) return; + + const frames = master.frame; + let matches = {}; + + // --- Exact filename match --- + $scope.chapters.forEach((chapter) => { + const expectedName = `chapter-${chapter.seconds}.jpg`; + const frame = frames.find((f) => f.key === expectedName); + if (frame) { + matches[chapter.seconds] = frame; + } + }); + // Collect chapters still missing + let missing = $scope.chapters.filter( + (c) => !matches.hasOwnProperty(c.seconds) + ); + + // If none missing, return + if (missing.length === 0) { + $scope.chapterFrames = matches; + return; + } + + // --- Fallback closest timestamp --- + missing.forEach((chapter) => { + const target = Number(chapter.seconds); + let closest = null; + let minDiff = Infinity; + + frames.forEach((frame) => { + if (!frame.tags || frame.tags.timestamp == null) return; + const ts = Number(frame.tags.timestamp); + const diff = Math.abs(ts - target); + if (diff < minDiff) { + minDiff = diff; + closest = frame; + } + }); + + matches[chapter.seconds] = closest || null; + }); + + $scope.chapterFrames = matches; + }; + + $scope.cleanHtmlFromTitle = function (title) { + if (!title) return title; + + // Remove HTML tags and clean up whitespace for display purposes only + let cleanTitle = title.replace(/<[^>]+>/g, " "); + cleanTitle = cleanTitle.replace(/\s+/g, " ").trim(); + + return cleanTitle; + }; + + $scope.share = { + link: window.location.href.split("?")[0], + startInput: "0:00", + withStart: false, + }; + + function parseHMS(txt) { + if (txt == null) return NaN; + txt = String(txt).trim(); + if (!txt) return NaN; + if (!/^\d{1,2}(?::\d{1,2}){0,2}$/.test(txt)) return NaN; + + var parts = txt.split(":").map(Number); + if (parts.length === 1) return parts[0]; // ss + if (parts.length === 2) return parts[0] * 60 + parts[1]; // mm:ss + return parts[0] * 3600 + parts[1] * 60 + parts[2]; // hh:mm:ss + } + + $scope.updateShareLink = function () { + var url = window.location.href.split("?")[0]; + if ($scope.share.withStart) { + var secs = parseHMS($scope.share.startInput); + if (!isNaN(secs) && secs > 0) { + url += (url.indexOf("?") === -1 ? "?" : "&") + "t=" + Math.floor(secs); + } + } + $scope.share.link = url; + }; + + $scope.copyShareLink = function () { + if (navigator.clipboard && window.isSecureContext) { + navigator.clipboard.writeText($scope.share.link); + } else { + var tmp = document.createElement("textarea"); + tmp.value = $scope.share.link; + document.body.appendChild(tmp); + tmp.select(); + try { + document.execCommand("copy"); + } catch (e) {} + document.body.removeChild(tmp); + } + }; + + /** * Trust iframe url * @memberof cdsRecordController @@ -180,7 +638,13 @@ function cdsRecordController($scope, $sce, $http) { $scope.$on("cds.record.loading.stop", cdsRecordLoadingStop); } -cdsRecordController.$inject = ["$scope", "$sce", "$http"]; +cdsRecordController.$inject = [ + "$scope", + "$sce", + "$http", + "$timeout", + "$filter", +]; //////////// @@ -212,6 +676,10 @@ function cdsRecordView($http) { function link(scope, element, attrs, vm) { scope.mediaDownloadEventUrl = attrs.mediaDownloadEventUrl; + scope.relatedQueryUrl = attrs.relatedQueryUrl; + + scope.DESCRIPTION_PREVIEW_LINES = parseInt(attrs.previewLines, 10) || 10; + // Get the record object and make it available to the scope $http.get(attrs.record).then( function (response) { @@ -265,9 +733,7 @@ cdsRecordView.$inject = ["$http"]; // Setup everything -angular - .module("cdsRecord.directives", []) - .directive("cdsRecordView", cdsRecordView); +angular.module("cdsRecord.directives", []).directive("cdsRecordView", cdsRecordView); angular .module("cdsRecord.controllers", []) diff --git a/cds/modules/theme/assets/bootstrap3/scss/cds/cds.scss b/cds/modules/theme/assets/bootstrap3/scss/cds/cds.scss index a8ad576c9..f29a4cbfb 100644 --- a/cds/modules/theme/assets/bootstrap3/scss/cds/cds.scss +++ b/cds/modules/theme/assets/bootstrap3/scss/cds/cds.scss @@ -81,6 +81,10 @@ html, body { } } +.panel-heading-warning { + background-color: $brand-warning !important; +} + .cds-deposit-metadata-extraction-alert { line-height: 33px; span { @@ -310,6 +314,35 @@ a.cds-anchor:hover{ align-items: center; } +.cds-tags-custom { + display: flex; + flex-direction: column; + align-items: flex-start; + margin-top: 5px; + + li a { + max-width: 400px !important; + background-color: transparent !important; + border: 1px solid $cds-primary-color !important; + color: $cds-primary-color !important; + &:hover { + background-color:lighten($cds-primary-color, 10%) !important; + color: #fff !important; + } + } +} + +.custom-flex-row { + display: flex; + justify-content: space-between; + align-items: center; +} + +.cds-video-title-text { + color: $cds-primary-color !important; + min-height: 3em; +} + ///////// // /search @@ -796,6 +829,10 @@ div[cds-search-results] { } } +.custom-video-style div[cds-search-results] .cds-video-title img { + height: 9em !important; +} + .cds-div-shadow { box-shadow: 0 1px 2px rgba(0,0,0,.1); } @@ -1079,3 +1116,332 @@ div[cds-search-results] { } } } + +.transcript-line:hover, +.transcript-line.active { + background-color: $gray-lighter; +} + +.transcription-button { + border-radius: 9px !important; + border: 1px solid $cds-primary-color !important; + color: $cds-primary-color !important; +} + +.transcription-button:hover { + background-color: $cds-primary-color !important; + color: #fff !important; +} + +.transcription-close:hover { + cursor: pointer; +} + +// Video detail chapters and transcript styles +.chapters-main-horizontal { + &::-webkit-scrollbar { + height: 6px; + } + + &::-webkit-scrollbar-track { + background: #f1f1f1; + border-radius: 3px; + } + + &::-webkit-scrollbar-thumb { + background: #c1c1c1; + border-radius: 3px; + + &:hover { + background: #a8a8a8; + } + } +} +// In this video panel styles +.in-this-video-panel { + height: calc((9/16)*70vw); + max-height: calc(100vh - 300px); + min-height: var(--flex854-mode-player-height); + display: flex; + flex-direction: column; + .tabs-container { + margin-bottom: 10px; + margin-top: 10px; + + .nav-tabs { + border-bottom: 1px solid #ddd; + + a { + padding: 10px 14px; + font-size: 14px; + } + } + } + .transcript-content{ + display: flex; + flex-direction: column; + overflow: hidden; + } + + .content-container { + flex: 1; + overflow-y: auto; + } + + .language-selector { + border-top: 1px solid #eee; + padding-top: 10px; + margin-bottom: 0; + + label { + font-size: 13px; + color: #666; + margin-bottom: 6px; + } + + .form-control { + font-size: 14px; + height: 30px; + + } + } + + .search-container { + margin-bottom: 10px; + + .form-control { + height: 30px; + font-size: 14px; + } + } +} + +// Common styles for both transcript and chapter items +.transcript-item, +.chapter-item { + cursor: pointer; + transition: background-color 0.2s ease; + + &:hover { + background-color: #f8f9fa; + } + + // Active and hover states + &.active { + background-color: #e3f2fd !important; + border-left: 4px solid #2196F3; + padding-left: 10px !important; + + .transcript-timestamp { + color: #1976D2 !important; + font-weight: 700 !important; + } + + .transcript-text { + color: #333 !important; + font-weight: 500 !important; + } + } + + &:hover:not(.active) { + border-left: 3px solid #e3f2fd; + padding-left: 11px !important; + } +} + +// Transcript specific styles +.transcript-item { + padding: 6px 8px; + border-radius: 3px; + + .transcript-timestamp { + font-size: 12px; + color: #2196F3; + font-weight: 600; + margin-bottom: 4px; + } + + .transcript-text { + font-size: 13px; + line-height: 1.4; + } +} + +// Chapter specific styles +.chapter-item { + padding: 12px; + border-bottom: 1px solid #f0f0f0; + + &:last-child { + border-bottom: none; + } + + .chapter-content { + display: flex; + align-items: center; + + .chapter-thumbnail { + min-width: 120px; + margin-right: 12px; + + .thumbnail-container { + width: 120px; + height: 67px; + border-radius: 4px; + overflow: hidden; + background-color: #f5f5f5; + + img { + width: 100%; + height: 100%; + object-fit: cover; + } + + .thumbnail-placeholder { + width: 100%; + height: 100%; + background-color: #e9ecef; + display: flex; + align-items: center; + justify-content: center; + + .fa { + color: #6c757d; + font-size: 16px; + } + } + } + } + + .chapter-info { + flex: 1; + + .chapter-title { + font-size: 15px; + font-weight: 500; + line-height: 1.3; + margin-bottom: 4px; + } + + .chapter-timestamp { + font-size: 14px; + color: #2196F3; + font-weight: 600; + } + } + } +} + +// Main chapters section +.cds-detail-chapters { + .chapter-item-main { + &:hover { + box-shadow: 0 2px 8px rgba(0,0,0,0.1); + transform: translateY(-1px); + } + + .chapter-timestamp-main .badge { + font-family: monospace; + } + + .chapter-title-main { + font-size: 14px; + line-height: 1.4; + } + } + + .chapter-thumbnail img { + transition: transform 0.2s ease; + + &:hover { + transform: scale(1.05); + } + } +} + +// Responsive styles +@media (max-width: 768px) { + .cds-detail-chapters .chapter-item-main { + margin-bottom: 10px; + + div[style*="display: flex"] { + flex-direction: column !important; + align-items: flex-start !important; + + .chapter-thumbnail { + margin-bottom: 8px; + margin-right: 0 !important; + } + + .chapter-info .chapter-timestamp-main { + margin-bottom: 5px; + } + } + } + + .chapter-item-horizontal-main { + width: 264px !important; + } + + .chapter-thumbnail-main-horizontal { + width: 240px !important; + height: 135px !important; + } +} + +@media (max-width: 480px) { + .chapter-item-horizontal-main { + width: 224px !important; + } + + .chapter-thumbnail-main-horizontal { + width: 200px !important; + height: 113px !important; + } +} + +@media(max-width: 992px) { + .in-this-video-panel { + min-height: 400px; + } +} + + +.cds-detail-sharelink { + display: flex; + align-items: center; + flex-wrap: wrap; +} +.cds-detail-sharelink .input-group { + flex: 1 1 420px; + margin-right: 10px; +} +.sharelink-start { + display: flex; + align-items: center; + font-size: 14px; +} +.sharelink-start .start-checkbox { + display: flex; + align-items: center; + cursor: pointer; + font-weight: 500; +} +.sharelink-start .start-checkbox input[type="checkbox"] { + width: 18px; + height: 18px; + margin-right: 6px; + transform: scale(1.25); +} +.start-time { + width: 64px; + border: none; + color: #ccc; + background: transparent; + text-align: center; + outline: none; +} +.sharelink-start:has(input[type="checkbox"]:checked) .start-time { + border-bottom: 1px solid #888; + color: #333333; +} \ No newline at end of file diff --git a/cds/modules/theme/static/templates/cds/video/small_video_card.html b/cds/modules/theme/static/templates/cds/video/small_video_card.html new file mode 100644 index 000000000..b009c1d5f --- /dev/null +++ b/cds/modules/theme/static/templates/cds/video/small_video_card.html @@ -0,0 +1,27 @@ + diff --git a/cds/modules/theme/webpack.py b/cds/modules/theme/webpack.py index 420a08ca9..a4f232d33 100644 --- a/cds/modules/theme/webpack.py +++ b/cds/modules/theme/webpack.py @@ -98,6 +98,7 @@ "rr-ng-ckeditor": "~0.2.1", # needed because ci fails on tests otherwise. not imported in any bundle "semantic-ui-less": "^2.4.1", + "vtt.js": "~0.13.0", }, aliases={ "@js/cds": "js/cds", diff --git a/scripts/setup b/scripts/setup index 434b5458b..075ae5ead 100755 --- a/scripts/setup +++ b/scripts/setup @@ -41,9 +41,12 @@ cds users create test@test.ch -a --password=123456 # Create an admin user cds users create admin@test.ch -a --password=123456 cds roles create admin +cds roles create cern-user +cds roles add test@test.ch cern-user cds roles add admin@test.ch admin cds access allow deposit-admin-access role admin cds access allow superuser-access role admin +cds access allow videos-upload-access role cern-user # Create a default files location cds files location --default videos /tmp/files diff --git a/setup.cfg b/setup.cfg index 7e2567e66..783559058 100644 --- a/setup.cfg +++ b/setup.cfg @@ -176,11 +176,13 @@ invenio_base.blueprints = cds_theme = cds.modules.theme.views:blueprint cds_redirector = cds.modules.redirector.views:blueprint cern_oauth = cds.modules.oauthclient.cern_openid:cern_openid_blueprint + cds_migration = cds.modules.legacy.redirector:blueprint invenio_config.module = cds = cds.config invenio_db.alembic = cds_announcements = cds.modules.announcements:alembic invenio_flows = cds.modules.flows:alembic + legacy = cds.modules.legacy:alembic invenio_jsonschemas.schemas = deposit = cds.modules.deposit.schemas record = cds.modules.records.schemas @@ -195,6 +197,7 @@ invenio_pidstore.minters = cds_report_number = cds.modules.records.minters:report_number_minter cds_recid = cds.modules.records.minters:cds_record_minter deposit = cds.modules.invenio_deposit.minters:deposit_minter + legacy = cds.modules.legacy.minters:legacy_recid_minter invenio_search.mappings = records = cds.modules.records.mappings deposits = cds.modules.deposit.mappings @@ -222,6 +225,9 @@ invenio_oauth2server.scopes = deposit_actions = cds.modules.invenio_deposit.scopes:actions_scope invenio_access.actions = deposit_admin_access = cds.modules.invenio_deposit.permissions:action_admin_access + upload_access_action = cds.modules.records.permissions:upload_access_action +invenio_db.models = + cds_migration_models = cds.modules.legacy.models [bdist_wheel] universal = 1 diff --git a/tests/unit/conftest.py b/tests/unit/conftest.py index 7bf85c96b..a4a69fe43 100644 --- a/tests/unit/conftest.py +++ b/tests/unit/conftest.py @@ -79,6 +79,7 @@ from cds.modules.invenio_deposit.permissions import action_admin_access from cds.modules.records.resolver import record_resolver from cds.modules.redirector.views import api_blueprint as cds_api_blueprint +from cds.modules.records.permissions import upload_access_action @pytest.yield_fixture(scope="module", autouse=True) @@ -203,6 +204,15 @@ def users(app, db): superadmin_role = Role(name="superadmin") db.session.add(ActionRoles(action=superuser_access.value, role=superadmin_role)) datastore.add_role_to_user(superadmin, superadmin_role) + # Give upload permission to all users + cern_user_role = Role(name="cern-user") + db.session.add( + ActionRoles(action=upload_access_action.value, role=cern_user_role) + ) + datastore.add_role_to_user(admin, cern_user_role) + datastore.add_role_to_user(user1, cern_user_role) + datastore.add_role_to_user(user2, cern_user_role) + datastore.add_role_to_user(superadmin, cern_user_role) db.session.commit() id_1 = user1.id id_2 = user2.id @@ -210,6 +220,19 @@ def users(app, db): return [id_1, id_2, id_4] +@pytest.fixture() +def external_user(app, db): + """Create external user.""" + with db.session.begin_nested(): + datastore = app.extensions["security"].datastore + user = datastore.create_user( + email="external@gmail.com", password="tester", active=True + ) + db.session.commit() + id = user.id + return id + + @pytest.fixture() def u_email(db, users): """Valid user email.""" diff --git a/tests/unit/test_external_user.py b/tests/unit/test_external_user.py new file mode 100644 index 000000000..0ffbe20a8 --- /dev/null +++ b/tests/unit/test_external_user.py @@ -0,0 +1,455 @@ +# -*- coding: utf-8 -*- +# +# This file is part of CDS. +# Copyright (C) 2025 CERN. +# +# CDS is free software; you can redistribute it +# and/or modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation; either version 2 of the +# License, or (at your option) any later version. +# +# CDS is distributed in the hope that it will be +# useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with CDS; if not, write to the +# Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, +# MA 02111-1307, USA. +# +# In applying this license, CERN does not +# waive the privileges and immunities granted to it by virtue of its status +# as an Intergovernmental Organization or submit itself to any jurisdiction. + + +"""Tests for external user permissions.""" + +import json +from io import BytesIO + +from flask import url_for +from flask_principal import AnonymousIdentity, UserNeed, identity_loaded +from flask_security import current_user, login_user, logout_user +from helpers import prepare_videos_for_publish +from invenio_access import Permission +from invenio_access.models import ActionRoles +from invenio_accounts.models import Role, User + +from cds.modules.deposit.api import deposit_video_resolver +from cds.modules.records.permissions import ( + has_upload_permission, + record_permission_factory, + upload_access_action, +) + + +def test_has_upload_permission_external_user(app, external_user): + """Test that external user without cern-user role cannot upload.""" + with app.test_request_context(): + user = User.query.get(external_user) + login_user(user) + + # Test has_upload_permission function directly + permission = Permission(upload_access_action) + assert not permission.can() + + # Test has_upload_permission helper function + assert not has_upload_permission() + + +def test_has_upload_permission_cern_user(app, users): + """Test that authenticated user with cern-user role can upload.""" + with app.test_request_context(): + user = User.query.get(users[0]) + login_user(user) + + # Test has_upload_permission function directly + permission = Permission(upload_access_action) + assert permission.can() + + # Test has_upload_permission helper function + assert has_upload_permission() + + +def test_record_create_permission_external_user(app, external_user, deposit_metadata): + """Test record create permission for external user without role.""" + with app.test_request_context(): + user = User.query.get(external_user) + login_user(user) + + # Test creating a record permission + factory = record_permission_factory(record=deposit_metadata, action="create") + assert not factory.can() + + +def test_project_rest_api_external_user_can_create( + api_app, external_user, deposit_metadata, json_partial_project_headers +): + """Test project creation via REST API for external user without role.""" + with api_app.test_client() as client: + user = User.query.get(external_user) + login_user(user) + + # Try to create a project via REST API + resp = client.post( + url_for("invenio_deposit_rest.project_list"), + data=json.dumps(deposit_metadata), + headers=json_partial_project_headers, + ) + # Should be forbidden (403) because user doesn't have upload permission + assert resp.status_code == 403 + + +def test_video_rest_api_external_user( + api_app, external_user, video_deposit_metadata, json_partial_project_headers +): + """Test video creation via REST API for external user without role.""" + with api_app.test_client() as client: + user = User.query.get(external_user) + login_user(user) + + # Try to create a video via REST API + resp = client.post( + url_for("invenio_deposit_rest.video_list"), + data=json.dumps(video_deposit_metadata), + headers=json_partial_project_headers, + ) + # Should be forbidden (403) because user doesn't have upload permission + assert resp.status_code == 403 + + +def test_anonymous_user_has_upload_permission(app): + """Test that anonymous users cannot upload.""" + with app.test_request_context(): + # No user logged in + logout_user() + + # Test has_upload_permission function directly + permission = Permission(upload_access_action) + assert not permission.can() + + # Test has_upload_permission helper function + assert not has_upload_permission() + + +def test_external_user_role_assignment(app, db, external_user): + """Test that we can dynamically add cern-user role to external user.""" + with app.test_request_context(): + user = User.query.get(external_user) + login_user(user) + + # Initially should not have upload permission + assert not has_upload_permission() + + # Add cern-user role + datastore = app.extensions["security"].datastore + cern_user_role = Role.query.filter_by(name="cern-user").first() + if not cern_user_role: + cern_user_role = Role(name="cern-user") + db.session.add( + ActionRoles(action=upload_access_action.value, role=cern_user_role) + ) + datastore.add_role_to_user(user, cern_user_role) + db.session.commit() + + # Need to logout and login again for role to take effect + logout_user() + login_user(user) + + # Now should have upload permission + assert has_upload_permission() + + +def test_published_video_access_control_external_user( + api_app, location, users, external_user, api_project +): + """Test external user access to published video records.""" + + @identity_loaded.connect + def mock_identity_provides(sender, identity): + """Ensure external users have their email in identity for testing.""" + if ( + not isinstance(identity, AnonymousIdentity) + and current_user.is_authenticated + ): + # Add UserNeed with email for all authenticated users (including external users) + if ( + current_user.email + and UserNeed(current_user.email) not in identity.provides + ): + identity.provides.add(UserNeed(current_user.email)) + + (_, video_1, video_2) = api_project + cern_user = User.query.filter_by(id=users[0]).first() + user2 = User.query.filter_by(id=users[1]).first() + ext_user = User.query.filter_by(id=external_user).first() + + # Prepare videos for publishing + prepare_videos_for_publish([video_1, video_2]) + vid1 = video_1["_deposit"]["id"] + vid2 = video_2["_deposit"]["id"] + + with api_app.test_client() as client: + login_user(cern_user) + + # Create restricted video (user2 access only) + video_1_metadata = dict(video_1) + for key in ["_files"]: + video_1_metadata.pop(key, None) + video_1_metadata["_access"] = {"read": [user2.email]} + + resp = client.put( + url_for("invenio_deposit_rest.video_item", pid_value=vid1), + data=json.dumps(video_1_metadata), + headers=[ + ("Content-Type", "application/vnd.video.partial+json"), + ("Accept", "application/json"), + ], + ) + assert resp.status_code == 200 + + # Publish restricted video + url = url_for( + "invenio_deposit_rest.video_actions", pid_value=vid1, action="publish" + ) + assert client.post(url).status_code == 202 + rec_pid1, _ = deposit_video_resolver(vid1).fetch_published() + + # Create restricted video (external user access only) + video_2_metadata = dict(video_2) + for key in ["_files"]: + video_2_metadata.pop(key, None) + video_2_metadata["_access"] = {"read": [ext_user.email]} + + resp = client.put( + url_for("invenio_deposit_rest.video_item", pid_value=vid2), + data=json.dumps(video_2_metadata), + headers=[ + ("Content-Type", "application/vnd.video.partial+json"), + ("Accept", "application/json"), + ], + ) + assert resp.status_code == 200 + + # Publish restricted video (external user access only) + url = url_for( + "invenio_deposit_rest.video_actions", pid_value=vid2, action="publish" + ) + assert client.post(url).status_code == 202 + rec_pid2, _ = deposit_video_resolver(vid2).fetch_published() + + # Test external user access + logout_user() + login_user(ext_user) + + # External user should be blocked from video1 + resp1 = client.get( + url_for("invenio_records_rest.recid_item", pid_value=rec_pid1.pid_value) + ) + assert resp1.status_code in [403, 404] + + # External user should access video2 + resp2 = client.get( + url_for("invenio_records_rest.recid_item", pid_value=rec_pid2.pid_value) + ) + assert resp2.status_code == 200 + video_data = json.loads(resp2.data.decode("utf-8")) + assert "metadata" in video_data + + +def test_external_user_deposit_operations( + api_app, + location, + external_user, + users, + deposit_metadata, + project_deposit_metadata, + video_deposit_metadata, + json_partial_project_headers, + json_partial_video_headers, +): + """Tests for external user deposit operations and permissions.""" + with api_app.test_request_context(): + # Setup: Create project and video as CERN user + cern_user = User.query.get(users[0]) + login_user(cern_user) + + with api_app.test_client() as client: + # Create project + resp = client.post( + url_for("invenio_deposit_rest.project_list"), + data=json.dumps(project_deposit_metadata), + headers=json_partial_project_headers, + ) + assert resp.status_code == 201 + project_data = json.loads(resp.data.decode("utf-8")) + project_id = project_data["metadata"]["_deposit"]["id"] + + # Create video + video_deposit_metadata["_project_id"] = project_id + resp = client.post( + url_for("invenio_deposit_rest.video_list"), + data=json.dumps(video_deposit_metadata), + headers=json_partial_video_headers, + ) + assert resp.status_code == 201 + video_data = json.loads(resp.data.decode("utf-8")) + video_id = video_data["metadata"]["_deposit"]["id"] + + # Switch to external user for testing + logout_user() + ext_user = User.query.get(external_user) + login_user(ext_user) + + # Test 1: Project creation - should be forbidden + resp = client.post( + url_for("invenio_deposit_rest.project_list"), + data=json.dumps(deposit_metadata), + headers=json_partial_project_headers, + ) + assert resp.status_code == 403 + + # Test 2: Project item operations - should be forbidden + # GET project + resp = client.get( + url_for("invenio_deposit_rest.project_item", pid_value=project_id) + ) + assert resp.status_code in [403, 404] + + # PUT project + resp = client.put( + url_for("invenio_deposit_rest.project_item", pid_value=project_id), + data=json.dumps(deposit_metadata), + headers=json_partial_project_headers, + ) + assert resp.status_code == 403 + + # DELETE project + resp = client.delete( + url_for("invenio_deposit_rest.project_item", pid_value=project_id) + ) + assert resp.status_code == 403 + + # Test 3: Project actions - should be forbidden + actions = ["publish", "edit", "discard"] + for action in actions: + resp = client.post( + url_for( + "invenio_deposit_rest.project_actions", + pid_value=project_id, + action=action, + ) + ) + assert resp.status_code in [403, 404] + + # Test 4: File operations - should be forbidden + # GET files list + resp = client.get( + url_for("invenio_deposit_rest.project_files", pid_value=project_id) + ) + assert resp.status_code in [403, 404] + + # POST file upload + resp = client.post( + url_for("invenio_deposit_rest.project_files", pid_value=project_id), + data={"file": (BytesIO(b"test content"), "test.txt")}, + ) + assert resp.status_code in [403, 404] + + # Test 5: Flows API - should be forbidden + flow_payload = { + "bucket_id": "test-bucket-id", + "deposit_id": video_id, + "key": "test-file.mp4", + "version_id": "test-version-id", + } + resp = client.post( + "/api/flows/", + data=json.dumps(flow_payload), + headers=json_partial_project_headers, + ) + assert resp.status_code in [401, 403, 404] + + +def test_external_user_update_access_without_upload_permission( + api_app, location, users, external_user, api_project +): + """Test that external user in _access.update still can't edit without upload permission.""" + + @identity_loaded.connect + def mock_identity_provides(sender, identity): + """Ensure external users have their email in identity for testing.""" + if ( + not isinstance(identity, AnonymousIdentity) + and current_user.is_authenticated + ): + if ( + current_user.email + and UserNeed(current_user.email) not in identity.provides + ): + identity.provides.add(UserNeed(current_user.email)) + + (_, video_1, _) = api_project + cern_user = User.query.get(users[0]) + ext_user = User.query.get(external_user) + + # Prepare videos for publishing + prepare_videos_for_publish([video_1]) + vid1 = video_1["_deposit"]["id"] + + with api_app.test_client() as client: + login_user(cern_user) + + # Create video with external user in update access + video_1_metadata = dict(video_1) + for key in ["_files"]: + video_1_metadata.pop(key, None) + video_1_metadata["_access"]["update"] = [ext_user.email] + + resp = client.put( + url_for("invenio_deposit_rest.video_item", pid_value=vid1), + data=json.dumps(video_1_metadata), + headers=[ + ("Content-Type", "application/vnd.video.partial+json"), + ("Accept", "application/json"), + ], + ) + # Publish video + url = url_for( + "invenio_deposit_rest.video_actions", pid_value=vid1, action="publish" + ) + assert client.post(url).status_code == 202 + rec_pid1, _ = deposit_video_resolver(vid1).fetch_published() + + # Test external user (has update access but can't edit) + logout_user() + login_user(ext_user) + + # External user should be able to read the record + resp = client.get( + url_for("invenio_records_rest.recid_item", pid_value=rec_pid1.pid_value) + ) + assert resp.status_code == 200 + video_data = json.loads(resp.data.decode("utf-8")) + + project_id = video_data["metadata"]["_project_id"] + deposit_id = video_data["metadata"]["_deposit"]["id"] + + # External user should not be able to get the deposit + resp = client.get( + url_for("invenio_deposit_rest.project_item", pid_value=project_id) + ) + assert resp.status_code in [403, 404] + + # External user should not be able to get the video deposit + res = client.get( + url_for("invenio_deposit_rest.video_item", pid_value=deposit_id) + ) + assert res.status_code in [403, 404] + + # External user should not be able to edit the video + url = url_for( + "invenio_deposit_rest.video_actions", pid_value=deposit_id, action="edit" + ) + assert client.post(url).status_code in [403, 404] diff --git a/tests/unit/test_fixtures.py b/tests/unit/test_fixtures.py index 565702dbf..83d7d81cf 100644 --- a/tests/unit/test_fixtures.py +++ b/tests/unit/test_fixtures.py @@ -91,7 +91,7 @@ def test_fixture_categories(app, script_info, db, es, location): res = runner.invoke(cli_categories, [], obj=script_info) assert res.exit_code == 0 categories = RecordMetadata.query.all() - assert len(categories) == 7 + assert len(categories) == 8 for category in categories: assert "VIDEO" in category.json["types"] diff --git a/tests/unit/test_flows_tasks.py b/tests/unit/test_flows_tasks.py index 0431dda8b..c436d4b0f 100644 --- a/tests/unit/test_flows_tasks.py +++ b/tests/unit/test_flows_tasks.py @@ -27,6 +27,7 @@ import uuid import mock +from cds.modules.flows.api import FlowService import pytest from celery import states from celery.exceptions import Retry @@ -63,6 +64,7 @@ from cds.modules.flows.tasks import ( DownloadTask, ExtractFramesTask, + ExtractChapterFramesTask, ExtractMetadataTask, TranscodeVideoTask, sync_records_with_deposit_files, @@ -575,3 +577,197 @@ def test_sync_records_with_deposits( # check that record and deposit are sync re_edited_files = edited_files + ["obj_4"] check_deposit_record_files(deposit, edited_files, record, re_edited_files) + + +def test_extract_chapter_frames_task(app, db, bucket, video, users): + """Test that chapter frames and chapters.vtt are created from description.""" + # Create a video object version + obj = ObjectVersion.create(bucket=bucket, key="video.mp4", stream=open(video, "rb")) + add_video_tags(obj) + db.session.commit() + + # Create a project and video deposit with short chapter timestamps + project_data = { + "category": "OPEN", + "type": "VIDEO", + } + project = Project.create(project_data) + + video_data = { + "_project_id": project["_deposit"]["id"], + "title": {"title": "Test Video with Chapters"}, + "description": """Test video with chapters: + 0:00 Introduction + 0:10 Chapter 1: Getting Started + 0:20 Chapter 2: Advanced Features + 0:30 Chapter 3: Examples + 0:40 Conclusion + """, + "contributors": [{"name": "Test User", "role": "Director"}], + } + video_deposit = Video.create(video_data) + deposit_id = str(video_deposit["_deposit"]["id"]) + + # Create flow metadata + payload = dict( + version_id=str(obj.version_id), + key=obj.key, + bucket_id=str(obj.bucket_id), + deposit_id=deposit_id, + ) + flow_metadata = FlowMetadata.create( + deposit_id=deposit_id, + user_id=users[0], + payload=payload, + ) + payload["flow_id"] = str(flow_metadata.id) + flow_metadata.payload = payload + flow = FlowService(flow_metadata) + db.session.commit() + + # Expected chapter timestamps in seconds (0 becomes 0.1 offset) + expected_timestamps = [0.1, 10, 20, 30, 40] + + # Mocks + with mock.patch("cds.modules.flows.tasks.move_file_into_local") as mock_move, \ + mock.patch("cds.modules.flows.tasks.ff_frames") as mock_ff_frames, \ + mock.patch("cds.modules.flows.tasks.file_opener_xrootd") as mock_file_opener, \ + mock.patch("os.path.exists", return_value=True), \ + mock.patch("os.path.getsize", return_value=1024), \ + mock.patch("cds.modules.flows.tasks.sync_records_with_deposit_files"), \ + mock.patch.object(ExtractChapterFramesTask, "_base_payload", {"tags": {"duration": 500}}), \ + mock.patch("cds.modules.flows.tasks.ExtractFramesTask._create_object") as mock_create_object, \ + mock.patch("cds.modules.flows.tasks.ObjectVersion.create") as mock_obj_create, \ + mock.patch("cds.modules.flows.tasks.ObjectVersionTag.create") as mock_tag_create: + + mock_move.return_value.__enter__.return_value = "/tmp/test_video.mp4" + mock_file_opener.return_value = BytesIO(b"fake_frame_data") + + fake_obj = mock.Mock() + mock_obj_create.return_value = fake_obj + + # Run task + ExtractChapterFramesTask().s(**payload.copy()).apply_async() + + # Ensure ff_frames was called once for each expected timestamp + assert mock_ff_frames.call_count == len(expected_timestamps) + call_args_list = [call[1] for call in mock_ff_frames.call_args_list] + + for i, call_args in enumerate(call_args_list): + assert call_args["start"] == expected_timestamps[i] + assert call_args["step"] == 1 + + # Ensure _create_object was called for each chapter frame + assert mock_create_object.call_count == len(expected_timestamps) + + # Verify all calls to _create_object had correct master_id + for call_args in mock_create_object.call_args_list: + kwargs = call_args.kwargs + assert kwargs["master_id"] == obj.version_id + assert kwargs["is_chapter_frame"] is True + assert kwargs["context_type"] == "frame" + assert kwargs["media_type"] == "image" + + # ---- Verify chapters.vtt creation ---- + mock_obj_create.assert_called_once() + vtt_call_args = mock_obj_create.call_args + assert vtt_call_args.kwargs["bucket"] == obj.bucket + assert vtt_call_args.kwargs["key"] == "chapters.vtt" + + # Tags applied to chapters.vtt + tag_keys = [c.args[1] for c in mock_tag_create.call_args_list] + assert "context_type" in tag_keys + assert "content_type" in tag_keys + assert "media_type" in tag_keys + + +def test_extract_chapter_frames_task_cleanup(app, db, bucket, video, users): + """Test that chapter frames are updated/cleaned when description changes.""" + # Create master ObjectVersion + obj = ObjectVersion.create(bucket=bucket, key="video.mp4", stream=open(video, "rb")) + add_video_tags(obj) + db.session.commit() + master_version_id = str(obj.version_id) + + # Create project + video deposit + project = Project.create({"category": "OPEN", "type": "VIDEO"}) + video_deposit = Video.create({ + "_project_id": project["_deposit"]["id"], + "title": {"title": "Video with chapters"}, + "description": """0:00 Intro + 0:10 Chapter 1 + 0:20 Chapter 2""", + }) + deposit_id = str(video_deposit["_deposit"]["id"]) + + # Flow metadata + payload = dict( + version_id=master_version_id, + key=obj.key, + bucket_id=str(obj.bucket_id), + deposit_id=deposit_id, + ) + flow_metadata = FlowMetadata.create( + deposit_id=deposit_id, user_id=users[0], payload=payload + ) + payload["flow_id"] = str(flow_metadata.id) + flow_metadata.payload = payload + FlowService(flow_metadata) + db.session.commit() + + with mock.patch("cds.modules.flows.tasks.move_file_into_local"), \ + mock.patch("cds.modules.flows.tasks.ff_frames"), \ + mock.patch("cds.modules.flows.tasks.file_opener_xrootd", return_value=BytesIO(b"fake_frame_data")), \ + mock.patch("os.path.exists", return_value=True), \ + mock.patch("os.path.getsize", return_value=1024), \ + mock.patch("cds.modules.flows.tasks.sync_records_with_deposit_files"), \ + mock.patch.object(ExtractChapterFramesTask, "_base_payload", {"tags": {"duration": 100}}), \ + mock.patch("cds.modules.flows.tasks.ExtractFramesTask._create_object") as mock_create_object, \ + mock.patch("cds.modules.flows.tasks.ExtractChapterFramesTask._build_chapter_vtt"), \ + mock.patch("cds.modules.flows.tasks.get_existing_chapter_frame_timestamps") as mock_existing: + + # Track created & disposed timestamps (floats) + created_timestamps = set() + disposed_timestamps = [] + + def fake_create_object(*args, **kwargs): + if "timestamp" in kwargs: + created_timestamps.add(float(kwargs["timestamp"])) + return mock.Mock() + + def fake_clean(version_id, valid_chapter_seconds=None, *args, **kwargs): + """Simulate cleaning: if 20.0 isn't in valid seconds, mark it disposed.""" + valid_floats = {float(s) for s in (valid_chapter_seconds or [])} + if 20.0 not in valid_floats: + disposed_timestamps.append(20.0) + + mock_create_object.side_effect = fake_create_object + + # First run → should create frames at 0.1, 10.0, 20.0 + mock_existing.return_value = set() + with mock.patch("cds.modules.flows.tasks.ExtractChapterFramesTask.clean", side_effect=fake_clean): + ExtractChapterFramesTask().s(**payload.copy()).apply_async() + assert created_timestamps == {0.1, 10.0, 20.0} + assert disposed_timestamps == [] # nothing disposed on first run + + video_deposit = deposit_video_resolver(deposit_id) + # Update description → now chapters at 0.1, 10.0, 30.0 + video_deposit["description"] = """0:00 Intro + 0:10 Chapter 1 + 0:30 Chapter 3""" + video_deposit.commit() + db.session.commit() + + # Reset state + created_timestamps.clear() + disposed_timestamps.clear() + + # Second run → should create 30.0, dispose 20.0 + mock_existing.return_value = {0.1, 10.0, 20.0} + with mock.patch("cds.modules.flows.tasks.ExtractChapterFramesTask.clean", side_effect=fake_clean): + ExtractChapterFramesTask().s(**payload.copy()).apply_async() + + assert 30.0 in created_timestamps + assert 20.0 in disposed_timestamps + + diff --git a/tests/unit/test_legacy_redirector.py b/tests/unit/test_legacy_redirector.py new file mode 100644 index 000000000..5ad47d74b --- /dev/null +++ b/tests/unit/test_legacy_redirector.py @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- +# +# Copyright (C) 2024 CERN. +# +# Invenio-RDM is free software; you can redistribute it and/or modify +# it under the terms of the MIT License; see LICENSE file for more details. + + +from io import BytesIO + +import pytest +from invenio_pidstore.models import PersistentIdentifier +from cds.modules.legacy.minters import legacy_recid_minter +from invenio_db import db + +LEGACY_RECID = "123456" +LEGACY_RECID_PID_TYPE = "lrecid" + +def test_legacy_record_redirection(app, video_published): + """Test legacy redirection mechanism.""" + + with app.test_client() as client: + # Fetch published record and its UUID + recid_pid, _ = video_published.fetch_published() + record_uuid = str(recid_pid.object_uuid) + + # Mint legacy PID + legacy_recid_minter(LEGACY_RECID, record_uuid) + db.session.commit() + + # Expected redirection target + expected_location = f"{app.config['SITE_URL']}/record/{recid_pid.pid_value}" + + # Test redirection from legacy recid + url = f"/legacy/record/{LEGACY_RECID}" + response = client.get(url, follow_redirects=False) + assert response.status_code == 301 + assert response.location == expected_location + + # Optionally follow the redirect if the final destination is also handled + response = client.get(url, follow_redirects=True) + assert response.status_code == 200 + + # Test not found for unknown recid + response = client.get("/legacy/record/654321") + assert response.status_code == 404 + diff --git a/tests/unit/test_schema_datacite.py b/tests/unit/test_schema_datacite.py index 60db92536..9b5228624 100644 --- a/tests/unit/test_schema_datacite.py +++ b/tests/unit/test_schema_datacite.py @@ -43,6 +43,7 @@ def test_video_metadata_tranform(app, video_record_metadata, recid_pid): {"creatorName": "pluto"}, {"creatorName": "zio paperino"}, ], + "contributors": [], "dates": [{"date": "2017-03-02", "dateType": "Issued"}], "descriptions": [ { diff --git a/tests/unit/test_video.py b/tests/unit/test_video.py index b9c6217a5..a51980faa 100644 --- a/tests/unit/test_video.py +++ b/tests/unit/test_video.py @@ -514,7 +514,6 @@ def test_video_keywords(es, api_project, keyword_1, keyword_2, users): @mock.patch("flask_login.current_user", mock_current_user) -@pytest.mark.skip(reason="TO BE CHECKED") def test_deposit_vtt_tags(api_app, db, api_project, users): """Test VTT tag generation.""" project, video_1, video_2 = api_project @@ -554,7 +553,7 @@ def test_deposit_vtt_tags(api_app, db, api_project, users): video_1 = deposit_video_resolver(video_1_depid) ObjectVersion.delete(bucket=video_1._bucket, key=obj.key) obj2 = ObjectVersion.create( - video_1._bucket, key="test_en.vtt", stream=BytesIO(b"hello") + video_1._bucket, key="new_fr.vtt", stream=BytesIO(b"hello") ) # publish again the video @@ -567,7 +566,7 @@ def test_deposit_vtt_tags(api_app, db, api_project, users): content_type="vtt", media_type="subtitle", context_type="subtitle", - language="en", + language="fr", ) # edit a re-published video @@ -717,13 +716,17 @@ def test_video_name_after_publish(api_app, db, api_project, users): def check_object_tags(obj, video, **tags): """Check tags on an ObjectVersion (i.e. on DB and deposit/record dump).""" assert obj.get_tags() == tags - for dump in [ - [d for d in files if d["key"] == obj.key][0] - for files in [video._get_files_dump(), video.fetch_published()[1]["_files"]] - ]: - assert dump["content_type"] == tags["content_type"] - assert dump["context_type"] == tags["context_type"] - assert dump["media_type"] == tags["media_type"] - assert dump["tags"] == { - t: tags[t] for t in tags if t not in ["context_type", "media_type"] - } + + file_sources = [ + video._get_files_dump(), + video.fetch_published()[1]["_files"] + ] + for files in file_sources: + matching_files = [d for d in files if d["key"] == obj.key] + for dump in matching_files: + assert dump["content_type"] == tags["content_type"] + assert dump["context_type"] == tags["context_type"] + assert dump["media_type"] == tags["media_type"] + assert dump["tags"] == { + t: tags[t] for t in tags if t not in ["context_type", "media_type"] + } diff --git a/tests/unit/test_video_rest.py b/tests/unit/test_video_rest.py index 0fa87973b..e052ec07c 100644 --- a/tests/unit/test_video_rest.py +++ b/tests/unit/test_video_rest.py @@ -27,6 +27,7 @@ import copy import json +from io import BytesIO from time import sleep import mock @@ -44,6 +45,7 @@ from invenio_db import db from invenio_indexer.api import RecordIndexer from invenio_search import current_search_client +from invenio_files_rest.models import ObjectVersion from cds.modules.deposit.api import deposit_project_resolver, deposit_video_resolver from cds.modules.deposit.receivers import datacite_register_after_publish @@ -588,6 +590,68 @@ def test_mint_doi_with_cli( doi, f"https://videos.cern.ch/record/{recid}" ) +def test_additional_files( + api_app, + users, + location, + json_headers, + json_partial_project_headers, + json_partial_video_headers, + deposit_metadata, + video_deposit_metadata, + project_deposit_metadata, +): + """Test video publish without DOI, then mint DOI using CLI.""" + api_app.config["DEPOSIT_DATACITE_MINTING_ENABLED"] = True + + with api_app.test_client() as client: + # Log in as the first user + login_user(User.query.get(users[0])) + + # Create a new project + project_dict = _create_new_project( + client, json_partial_project_headers, project_deposit_metadata + ) + + # Add a new empty video + video_dict = _add_video_info_to_project( + client, json_partial_video_headers, project_dict, video_deposit_metadata + ) + + video_depid = video_dict["metadata"]["_deposit"]["id"] + video_deposit = deposit_video_resolver(video_depid) + video_deposit_id = video_deposit["_deposit"]["id"] + bucket_id = video_deposit["_buckets"]["deposit"] + + # Upload additional file + key = "test.mp4" + headers = { + "X-Invenio-File-Tags": "context_type=additional_file" + } + resp = client.put( + url_for("invenio_files_rest.object_api", bucket_id=bucket_id, key=key), + input_stream=BytesIO(b"updated_content"), + headers=headers, + ) + assert resp.status_code == 200 + # Test it has the correct tags + tags = ObjectVersion.get(bucket_id, key).get_tags() + assert tags["context_type"] == "additional_file" + assert tags["content_type"] == "mp4" + assert tags["media_type"] == "video" + + # Upload invalid file and return 400 + key = "test" + headers = { + "X-Invenio-File-Tags": "context_type=additional_file" + } + resp = client.put( + url_for("invenio_files_rest.object_api", bucket_id=bucket_id, key=key), + input_stream=BytesIO(b"updated_content"), + headers=headers, + ) + assert resp.status_code == 400 + def _deposit_edit(client, json_headers, id): """Post action to edit deposit."""