Home Reference Source

src/utils/webvtt-parser.ts

  1. import { VTTParser } from './vttparser';
  2. import { utf8ArrayToStr } from '../demux/id3';
  3. import { toMpegTsClockFromTimescale } from './timescale-conversion';
  4. import { normalizePts } from '../remux/mp4-remuxer';
  5. import type { VTTCCs } from '../types/vtt';
  6.  
  7. const LINEBREAKS = /\r\n|\n\r|\n|\r/g;
  8.  
  9. // String.prototype.startsWith is not supported in IE11
  10. const startsWith = function (
  11. inputString: string,
  12. searchString: string,
  13. position: number = 0
  14. ) {
  15. return (
  16. inputString.slice(position, position + searchString.length) === searchString
  17. );
  18. };
  19.  
  20. const cueString2millis = function (timeString: string) {
  21. let ts = parseInt(timeString.slice(-3));
  22. const secs = parseInt(timeString.slice(-6, -4));
  23. const mins = parseInt(timeString.slice(-9, -7));
  24. const hours =
  25. timeString.length > 9
  26. ? parseInt(timeString.substring(0, timeString.indexOf(':')))
  27. : 0;
  28.  
  29. if (
  30. !Number.isFinite(ts) ||
  31. !Number.isFinite(secs) ||
  32. !Number.isFinite(mins) ||
  33. !Number.isFinite(hours)
  34. ) {
  35. throw Error(`Malformed X-TIMESTAMP-MAP: Local:${timeString}`);
  36. }
  37.  
  38. ts += 1000 * secs;
  39. ts += 60 * 1000 * mins;
  40. ts += 60 * 60 * 1000 * hours;
  41.  
  42. return ts;
  43. };
  44.  
  45. // From https://github.com/darkskyapp/string-hash
  46. const hash = function (text: string) {
  47. let hash = 5381;
  48. let i = text.length;
  49. while (i) {
  50. hash = (hash * 33) ^ text.charCodeAt(--i);
  51. }
  52.  
  53. return (hash >>> 0).toString();
  54. };
  55.  
  56. // Create a unique hash id for a cue based on start/end times and text.
  57. // This helps timeline-controller to avoid showing repeated captions.
  58. export function generateCueId(
  59. startTime: number,
  60. endTime: number,
  61. text: string
  62. ) {
  63. return hash(startTime.toString()) + hash(endTime.toString()) + hash(text);
  64. }
  65.  
  66. const calculateOffset = function (vttCCs: VTTCCs, cc, presentationTime) {
  67. let currCC = vttCCs[cc];
  68. let prevCC = vttCCs[currCC.prevCC];
  69.  
  70. // This is the first discontinuity or cues have been processed since the last discontinuity
  71. // Offset = current discontinuity time
  72. if (!prevCC || (!prevCC.new && currCC.new)) {
  73. vttCCs.ccOffset = vttCCs.presentationOffset = currCC.start;
  74. currCC.new = false;
  75. return;
  76. }
  77.  
  78. // There have been discontinuities since cues were last parsed.
  79. // Offset = time elapsed
  80. while (prevCC?.new) {
  81. vttCCs.ccOffset += currCC.start - prevCC.start;
  82. currCC.new = false;
  83. currCC = prevCC;
  84. prevCC = vttCCs[currCC.prevCC];
  85. }
  86.  
  87. vttCCs.presentationOffset = presentationTime;
  88. };
  89.  
  90. export function parseWebVTT(
  91. vttByteArray: ArrayBuffer,
  92. initPTS: number,
  93. timescale: number,
  94. vttCCs: VTTCCs,
  95. cc: number,
  96. timeOffset: number,
  97. callBack: (cues: VTTCue[]) => void,
  98. errorCallBack: (error: Error) => void
  99. ) {
  100. const parser = new VTTParser();
  101. // Convert byteArray into string, replacing any somewhat exotic linefeeds with "\n", then split on that character.
  102. // Uint8Array.prototype.reduce is not implemented in IE11
  103. const vttLines = utf8ArrayToStr(new Uint8Array(vttByteArray))
  104. .trim()
  105. .replace(LINEBREAKS, '\n')
  106. .split('\n');
  107. const cues: VTTCue[] = [];
  108. const initPTS90Hz = toMpegTsClockFromTimescale(initPTS, timescale);
  109. let cueTime = '00:00.000';
  110. let timestampMapMPEGTS = 0;
  111. let timestampMapLOCAL = 0;
  112. let parsingError: Error;
  113. let inHeader = true;
  114.  
  115. parser.oncue = function (cue: VTTCue) {
  116. // Adjust cue timing; clamp cues to start no earlier than - and drop cues that don't end after - 0 on timeline.
  117. const currCC = vttCCs[cc];
  118. let cueOffset = vttCCs.ccOffset;
  119.  
  120. // Calculate subtitle PTS offset
  121. const webVttMpegTsMapOffset = (timestampMapMPEGTS - initPTS90Hz) / 90000;
  122.  
  123. // Update offsets for new discontinuities
  124. if (currCC?.new) {
  125. if (timestampMapLOCAL !== undefined) {
  126. // When local time is provided, offset = discontinuity start time - local time
  127. cueOffset = vttCCs.ccOffset = currCC.start;
  128. } else {
  129. calculateOffset(vttCCs, cc, webVttMpegTsMapOffset);
  130. }
  131. }
  132.  
  133. if (webVttMpegTsMapOffset) {
  134. // If we have MPEGTS, offset = presentation time + discontinuity offset
  135. cueOffset = webVttMpegTsMapOffset - vttCCs.presentationOffset;
  136. }
  137.  
  138. const duration = cue.endTime - cue.startTime;
  139. const startTime =
  140. normalizePts(
  141. (cue.startTime + cueOffset - timestampMapLOCAL) * 90000,
  142. timeOffset * 90000
  143. ) / 90000;
  144. cue.startTime = Math.max(startTime, 0);
  145. cue.endTime = Math.max(startTime + duration, 0);
  146.  
  147. //trim trailing webvtt block whitespaces
  148. const text = cue.text.trim();
  149.  
  150. // Fix encoding of special characters
  151. cue.text = decodeURIComponent(encodeURIComponent(text));
  152.  
  153. // If the cue was not assigned an id from the VTT file (line above the content), create one.
  154. if (!cue.id) {
  155. cue.id = generateCueId(cue.startTime, cue.endTime, text);
  156. }
  157.  
  158. if (cue.endTime > 0) {
  159. cues.push(cue);
  160. }
  161. };
  162.  
  163. parser.onparsingerror = function (error: Error) {
  164. parsingError = error;
  165. };
  166.  
  167. parser.onflush = function () {
  168. if (parsingError) {
  169. errorCallBack(parsingError);
  170. return;
  171. }
  172. callBack(cues);
  173. };
  174.  
  175. // Go through contents line by line.
  176. vttLines.forEach((line) => {
  177. if (inHeader) {
  178. // Look for X-TIMESTAMP-MAP in header.
  179. if (startsWith(line, 'X-TIMESTAMP-MAP=')) {
  180. // Once found, no more are allowed anyway, so stop searching.
  181. inHeader = false;
  182. // Extract LOCAL and MPEGTS.
  183. line
  184. .slice(16)
  185. .split(',')
  186. .forEach((timestamp) => {
  187. if (startsWith(timestamp, 'LOCAL:')) {
  188. cueTime = timestamp.slice(6);
  189. } else if (startsWith(timestamp, 'MPEGTS:')) {
  190. timestampMapMPEGTS = parseInt(timestamp.slice(7));
  191. }
  192. });
  193. try {
  194. // Convert cue time to seconds
  195. timestampMapLOCAL = cueString2millis(cueTime) / 1000;
  196. } catch (error) {
  197. parsingError = error;
  198. }
  199. // Return without parsing X-TIMESTAMP-MAP line.
  200. return;
  201. } else if (line === '') {
  202. inHeader = false;
  203. }
  204. }
  205. // Parse line by default.
  206. parser.parse(line + '\n');
  207. });
  208.  
  209. parser.flush();
  210. }