Skip to content

Commit 9f1c4ad

Browse files
author
Adam Waldron
authored
feat: parse mp4 webvtt segments (#1545)
1 parent 8456cb3 commit 9f1c4ad

10 files changed

+449
-22
lines changed

package-lock.json

Lines changed: 12 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@
6363
"global": "^4.4.0",
6464
"m3u8-parser": "^7.2.0",
6565
"mpd-parser": "^1.3.1",
66-
"mux.js": "7.0.3",
66+
"mux.js": "7.1.0",
6767
"video.js": "^7 || ^8"
6868
},
6969
"peerDependencies": {

src/media-segment-request.js

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ export const REQUEST_ERRORS = {
1818
ABORTED: -102
1919
};
2020

21+
const WEB_VTT_CODEC = 'wvtt';
22+
2123
/**
2224
* Abort all requests
2325
*
@@ -164,6 +166,43 @@ const handleKeyResponse = (segment, objects, finishProcessingFn, triggerSegmentE
164166
return finishProcessingFn(null, segment);
165167
};
166168

169+
/**
170+
* Processes an mp4 init segment depending on the codec through the transmuxer.
171+
*
172+
* @param {Object} segment init segment to process
173+
* @param {string} codec the codec of the text segments
174+
*/
175+
const initMp4Text = (segment, codec) => {
176+
if (codec === WEB_VTT_CODEC) {
177+
segment.transmuxer.postMessage({
178+
action: 'initMp4WebVttParser',
179+
data: segment.map.bytes
180+
});
181+
}
182+
};
183+
184+
/**
185+
* Parses an mp4 text segment with the transmuxer and calls the doneFn from
186+
* the segment loader.
187+
*
188+
* @param {Object} segment the text segment to parse
189+
* @param {string} codec the codec of the text segment
190+
* @param {Function} doneFn the doneFn passed from the segment loader
191+
*/
192+
const parseMp4TextSegment = (segment, codec, doneFn) => {
193+
if (codec === WEB_VTT_CODEC) {
194+
workerCallback({
195+
action: 'getMp4WebVttText',
196+
data: segment.bytes,
197+
transmuxer: segment.transmuxer,
198+
callback: ({data, mp4VttCues}) => {
199+
segment.bytes = data;
200+
doneFn(null, segment, { mp4VttCues });
201+
}
202+
});
203+
}
204+
};
205+
167206
const parseInitSegment = (segment, callback) => {
168207
const type = detectContainerForBytes(segment.map.bytes);
169208

@@ -206,6 +245,10 @@ const parseInitSegment = (segment, callback) => {
206245
segment.map.timescales[track.id] = track.timescale;
207246
}
208247

248+
if (track.type === 'text') {
249+
initMp4Text(segment, track.codec);
250+
}
251+
209252
});
210253

211254
return callback(null);
@@ -468,6 +511,16 @@ const handleSegmentBytes = ({
468511
if (isLikelyFmp4MediaSegment(bytesAsUint8Array)) {
469512
segment.isFmp4 = true;
470513
const {tracks} = segment.map;
514+
const isMp4TextSegment = tracks.text && (!tracks.audio || !tracks.video);
515+
516+
if (isMp4TextSegment) {
517+
dataFn(segment, {
518+
data: bytesAsUint8Array,
519+
type: 'text'
520+
});
521+
parseMp4TextSegment(segment, tracks.text.codec, doneFn);
522+
return;
523+
}
471524

472525
const trackInfo = {
473526
isFmp4: true,

src/transmuxer-worker.js

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616

1717
import {Transmuxer} from 'mux.js/lib/mp4/transmuxer';
1818
import CaptionParser from 'mux.js/lib/mp4/caption-parser';
19+
import WebVttParser from 'mux.js/lib/mp4/webvtt-parser';
1920
import mp4probe from 'mux.js/lib/mp4/probe';
2021
import tsInspector from 'mux.js/lib/tools/ts-inspector.js';
2122
import {
@@ -207,6 +208,44 @@ class MessageHandlers {
207208
}, [segment.buffer]);
208209
}
209210

211+
/**
212+
* Initializes the WebVttParser and passes the init segment.
213+
*
214+
* @param {Uint8Array} data mp4 boxed WebVTT init segment data
215+
*/
216+
initMp4WebVttParser(data) {
217+
if (!this.webVttParser) {
218+
this.webVttParser = new WebVttParser();
219+
}
220+
const segment = new Uint8Array(data.data, data.byteOffset, data.byteLength);
221+
222+
// Set the timescale for the parser.
223+
// This can be called repeatedly in order to set and re-set the timescale.
224+
this.webVttParser.init(segment);
225+
}
226+
227+
/**
228+
* Parse an mp4 encapsulated WebVTT segment and return an array of cues.
229+
*
230+
* @param {Uint8Array} data a text/webvtt segment
231+
* @return {Object[]} an array of parsed cue objects
232+
*/
233+
getMp4WebVttText(data) {
234+
if (!this.webVttParser) {
235+
// timescale might not be set yet if the parser is created before an init segment is passed.
236+
// default timescale is 90k.
237+
this.webVttParser = new WebVttParser();
238+
}
239+
const segment = new Uint8Array(data.data, data.byteOffset, data.byteLength);
240+
const parsed = this.webVttParser.parseSegment(segment);
241+
242+
this.self.postMessage({
243+
action: 'getMp4WebVttText',
244+
mp4VttCues: parsed || [],
245+
data: segment.buffer
246+
}, [segment.buffer]);
247+
}
248+
210249
probeMp4StartTime({timescales, data}) {
211250
const startTime = mp4probe.startTime(timescales, data);
212251

src/vtt-segment-loader.js

Lines changed: 58 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -46,11 +46,6 @@ export default class VTTSegmentLoader extends SegmentLoader {
4646
this.shouldSaveSegmentTimingInfo_ = false;
4747
}
4848

49-
createTransmuxer_() {
50-
// don't need to transmux any subtitles
51-
return null;
52-
}
53-
5449
/**
5550
* Indicates which time ranges are buffered
5651
*
@@ -282,6 +277,11 @@ export default class VTTSegmentLoader extends SegmentLoader {
282277
}
283278

284279
const segmentInfo = this.pendingSegment_;
280+
const isMp4WebVttSegmentWithCues = result.mp4VttCues && result.mp4VttCues.length;
281+
282+
if (isMp4WebVttSegmentWithCues) {
283+
segmentInfo.mp4VttCues = result.mp4VttCues;
284+
}
285285

286286
// although the VTT segment loader bandwidth isn't really used, it's good to
287287
// maintain functionality between segment loaders
@@ -334,11 +334,13 @@ export default class VTTSegmentLoader extends SegmentLoader {
334334
return;
335335
}
336336

337-
this.updateTimeMapping_(
338-
segmentInfo,
339-
this.syncController_.timelines[segmentInfo.timeline],
340-
this.playlist_
341-
);
337+
if (!isMp4WebVttSegmentWithCues) {
338+
this.updateTimeMapping_(
339+
segmentInfo,
340+
this.syncController_.timelines[segmentInfo.timeline],
341+
this.playlist_
342+
);
343+
}
342344

343345
if (segmentInfo.cues.length) {
344346
segmentInfo.timingInfo = {
@@ -380,14 +382,49 @@ export default class VTTSegmentLoader extends SegmentLoader {
380382
this.handleAppendsDone_();
381383
}
382384

383-
handleData_() {
384-
// noop as we shouldn't be getting video/audio data captions
385-
// that we do not support here.
385+
handleData_(simpleSegment, result) {
386+
const isVttType = simpleSegment && simpleSegment.type === 'vtt';
387+
const isTextResult = result && result.type === 'text';
388+
const isFmp4VttSegment = isVttType && isTextResult;
389+
// handle segment data for fmp4 encapsulated webvtt
390+
391+
if (isFmp4VttSegment) {
392+
super.handleData_(simpleSegment, result);
393+
}
386394
}
395+
387396
updateTimingInfoEnd_() {
388397
// noop
389398
}
390399

400+
/**
401+
* Utility function for converting mp4 webvtt cue objects into VTTCues.
402+
*
403+
* @param {Object} segmentInfo with mp4 webvtt cues for parsing into VTTCue objecs
404+
*/
405+
parseMp4VttCues_(segmentInfo) {
406+
const timestampOffset = this.sourceUpdater_.videoTimestampOffset() === null ?
407+
this.sourceUpdater_.audioTimestampOffset() :
408+
this.sourceUpdater_.videoTimestampOffset();
409+
410+
segmentInfo.mp4VttCues.forEach((cue) => {
411+
const start = cue.start + timestampOffset;
412+
const end = cue.end + timestampOffset;
413+
const vttCue = new window.VTTCue(start, end, cue.cueText);
414+
415+
if (cue.settings) {
416+
cue.settings.split(' ').forEach((cueSetting) => {
417+
const keyValString = cueSetting.split(':');
418+
const key = keyValString[0];
419+
const value = keyValString[1];
420+
421+
vttCue[key] = isNaN(value) ? value : Number(value);
422+
});
423+
}
424+
segmentInfo.cues.push(vttCue);
425+
});
426+
}
427+
391428
/**
392429
* Uses the WebVTT parser to parse the segment response
393430
*
@@ -406,6 +443,14 @@ export default class VTTSegmentLoader extends SegmentLoader {
406443
throw new NoVttJsError();
407444
}
408445

446+
segmentInfo.cues = [];
447+
segmentInfo.timestampmap = { MPEGTS: 0, LOCAL: 0 };
448+
449+
if (segmentInfo.mp4VttCues) {
450+
this.parseMp4VttCues_(segmentInfo);
451+
return;
452+
}
453+
409454
if (typeof window.TextDecoder === 'function') {
410455
decoder = new window.TextDecoder('utf8');
411456
} else {
@@ -419,9 +464,6 @@ export default class VTTSegmentLoader extends SegmentLoader {
419464
decoder
420465
);
421466

422-
segmentInfo.cues = [];
423-
segmentInfo.timestampmap = { MPEGTS: 0, LOCAL: 0 };
424-
425467
parser.oncue = segmentInfo.cues.push.bind(segmentInfo.cues);
426468
parser.ontimestampmap = (map) => {
427469
segmentInfo.timestampmap = map;

test/media-segment-request.test.js

Lines changed: 84 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ import {
2121
mp4VideoInit,
2222
muxed as muxedSegment,
2323
webmVideo,
24-
webmVideoInit
24+
webmVideoInit,
25+
mp4WebVttInit,
26+
mp4WebVtt
2527
} from 'create-test-data!segments';
2628
// needed for plugin registration
2729
import '../src/videojs-http-streaming';
@@ -1863,3 +1865,84 @@ QUnit.test('can get emsg ID3 frames from fmp4 audio segment', function(assert) {
18631865
// Simulate receiving the init segment after the media
18641866
this.standardXHRResponse(initReq, mp4AudioInit());
18651867
});
1868+
1869+
QUnit.test('can get webvtt text from an fmp4 segment', function(assert) {
1870+
const done = assert.async();
1871+
// expected frame data
1872+
const expectedCues = [
1873+
{
1874+
cueText: '2024-10-16T05:13:50Z\nen # 864527815',
1875+
end: 1729055630.9,
1876+
settings: undefined,
1877+
start: 1729055630
1878+
},
1879+
{
1880+
cueText: '2024-10-16T05:13:51Z\nen # 864527815',
1881+
end: 1729055631.9,
1882+
settings: undefined,
1883+
start: 1729055631
1884+
}
1885+
];
1886+
const transmuxer = new videojs.EventTarget();
1887+
1888+
transmuxer.postMessage = (event) => {
1889+
if (event.action === 'getMp4WebVttText') {
1890+
transmuxer.trigger({
1891+
type: 'message',
1892+
data: {
1893+
action: 'getMp4WebVttText',
1894+
data: event.data,
1895+
mp4VttCues: expectedCues
1896+
}
1897+
});
1898+
}
1899+
1900+
if (event.action === 'probeMp4Tracks') {
1901+
transmuxer.trigger({
1902+
type: 'message',
1903+
data: {
1904+
action: 'probeMp4Tracks',
1905+
data: event.data,
1906+
tracks: [{type: 'text', codec: 'wvtt'}]
1907+
}
1908+
});
1909+
}
1910+
};
1911+
1912+
mediaSegmentRequest({
1913+
xhr: this.xhr,
1914+
xhrOptions: this.xhrOptions,
1915+
decryptionWorker: this.mockDecrypter,
1916+
segment: {
1917+
transmuxer,
1918+
resolvedUri: 'mp4WebVtt.mp4',
1919+
map: {
1920+
resolvedUri: 'mp4WebVttInit.mp4'
1921+
},
1922+
isFmp4: true
1923+
},
1924+
progressFn: this.noop,
1925+
trackInfoFn: this.noop,
1926+
timingInfoFn: this.noop,
1927+
id3Fn: this.noop,
1928+
captionsFn: this.noop,
1929+
dataFn: this.noop,
1930+
doneFn: (_e, _s, result) => {
1931+
assert.equal(result.mp4VttCues.length, 2, 'there are 2 mp4VttCues');
1932+
assert.deepEqual(result.mp4VttCues, expectedCues, 'mp4VttCues are expected values');
1933+
transmuxer.off();
1934+
done();
1935+
},
1936+
triggerSegmentEventFn: this.noop
1937+
});
1938+
assert.equal(this.requests.length, 2, 'there are two requests');
1939+
1940+
const initReq = this.requests.shift();
1941+
const segmentReq = this.requests.shift();
1942+
1943+
assert.equal(initReq.uri, 'mp4WebVttInit.mp4', 'the first request is for the init segment');
1944+
assert.equal(segmentReq.uri, 'mp4WebVtt.mp4', 'the second request is for a segment');
1945+
1946+
this.standardXHRResponse(initReq, mp4WebVttInit());
1947+
this.standardXHRResponse(segmentReq, mp4WebVtt());
1948+
});

test/segments/mp4WebVtt.mp4

302 Bytes
Binary file not shown.

test/segments/mp4WebVttInit.mp4

564 Bytes
Binary file not shown.

0 commit comments

Comments
 (0)