/* https://github.com/antimatter15/whammy The MIT License (MIT) Copyright (c) 2015 Kevin Kwok Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */ function atob (str) { return Buffer.from(str, 'base64').toString('binary'); } // in this case, frames has a very specific meaning, which will be // detailed once i finish writing the code function ToWebM (frames) { const info = checkFrames(frames); // max duration by cluster in milliseconds const CLUSTER_MAX_DURATION = 30000; const EBML = [ { id: 0x1a45dfa3, // EBML data: [ { data: 1, id: 0x4286 // EBMLVersion }, { data: 1, id: 0x42f7 // EBMLReadVersion }, { data: 4, id: 0x42f2 // EBMLMaxIDLength }, { data: 8, id: 0x42f3 // EBMLMaxSizeLength }, { data: 'webm', id: 0x4282 // DocType }, { data: 2, id: 0x4287 // DocTypeVersion }, { data: 2, id: 0x4285 // DocTypeReadVersion } ] }, { id: 0x18538067, // Segment data: [ { id: 0x1549a966, // Info data: [ { data: 1e6, // do things in millisecs (num of nanosecs for duration scale) id: 0x2ad7b1 // TimecodeScale }, { data: 'whammy', id: 0x4d80 // MuxingApp }, { data: 'whammy', id: 0x5741 // WritingApp }, { data: doubleToString(info.duration), id: 0x4489 // Duration } ] }, { id: 0x1654ae6b, // Tracks data: [ { id: 0xae, // TrackEntry data: [ { data: 1, id: 0xd7 // TrackNumber }, { data: 1, id: 0x73c5 // TrackUID }, { data: 0, id: 0x9c // FlagLacing }, { data: 'und', id: 0x22b59c // Language }, { data: 'V_VP8', id: 0x86 // CodecID }, { data: 'VP8', id: 0x258688 // CodecName }, { data: 1, id: 0x83 // TrackType }, { id: 0xe0, // Video data: [ { data: info.width, id: 0xb0 // PixelWidth }, { data: info.height, id: 0xba // PixelHeight } ] } ] } ] }, { id: 0x1c53bb6b, // Cues data: [ // cue insertion point ] } // cluster insertion point ] } ]; const segment = EBML[1]; const cues = segment.data[2]; // Generate clusters (max duration) let frameNumber = 0; let clusterTimecode = 0; while (frameNumber < frames.length) { const cuePoint = { id: 0xbb, // CuePoint data: [ { data: Math.round(clusterTimecode), id: 0xb3 // CueTime }, { id: 0xb7, // CueTrackPositions data: [ { data: 1, id: 0xf7 // CueTrack }, { data: 0, // to be filled in when we know it size: 8, id: 0xf1 // CueClusterPosition } ] } ] }; cues.data.push(cuePoint); const clusterFrames = []; let clusterDuration = 0; do { clusterFrames.push(frames[frameNumber]); clusterDuration += frames[frameNumber].duration; frameNumber++; } while (frameNumber < frames.length && clusterDuration < CLUSTER_MAX_DURATION); let clusterCounter = 0; const cluster = { id: 0x1f43b675, // Cluster data: [ { data: Math.round(clusterTimecode), id: 0xe7 // Timecode } ].concat(clusterFrames.map(function (webp) { const block = makeSimpleBlock({ discardable: 0, frame: webp.data.slice(4), invisible: 0, keyframe: 1, lacing: 0, trackNum: 1, timecode: Math.round(clusterCounter) }); clusterCounter += webp.duration; return { data: block, id: 0xa3 }; })) }; // Add cluster to segment segment.data.push(cluster); clusterTimecode += clusterDuration; } // First pass to compute cluster positions let position = 0; for (let i = 0; i < segment.data.length; i++) { if (i >= 3) { cues.data[i - 3].data[1].data[1].data = position; } const data = generateEBML([segment.data[i]]); position += data.size || data.byteLength || data.length; if (i !== 2) { // not cues // Save results to avoid having to encode everything twice segment.data[i] = data; } } return generateEBML(EBML); } // sums the lengths of all the frames and gets the duration, woo function checkFrames (frames) { const width = frames[0].width; const height = frames[0].height; let duration = frames[0].duration; for (let i = 1; i < frames.length; i++) { if (frames[i].width !== width) throw new Error('Frame ' + (i + 1) + ' has a different width'); if (frames[i].height !== height) throw new Error('Frame ' + (i + 1) + ' has a different height'); if (frames[i].duration < 0 || frames[i].duration > 0x7fff) throw new Error('Frame ' + (i + 1) + ' has a weird duration (must be between 0 and 32767)'); duration += frames[i].duration; } return { duration: duration, width: width, height: height }; } function numToBuffer (num) { const parts = []; while (num > 0) { parts.push(num & 0xff); num = num >> 8; } return new Uint8Array(parts.reverse()); } function numToFixedBuffer (num, size) { const parts = new Uint8Array(size); for (let i = size - 1; i >= 0; i--) { parts[i] = num & 0xff; num = num >> 8; } return parts; } function strToBuffer (str) { // return new Blob([str]); const arr = new Uint8Array(str.length); for (let i = 0; i < str.length; i++) { arr[i] = str.charCodeAt(i); } return arr; // this is slower // return new Uint8Array(str.split('').map(function(e){ // return e.charCodeAt(0) // })) } // sorry this is ugly, and sort of hard to understand exactly why this was done // at all really, but the reason is that there's some code below that i dont really // feel like understanding, and this is easier than using my brain. function bitsToBuffer (bits) { const data = []; const pad = (bits.length % 8) ? (new Array(1 + 8 - (bits.length % 8))).join('0') : ''; bits = pad + bits; for (let i = 0; i < bits.length; i += 8) { data.push(parseInt(bits.substr(i, 8), 2)); } return new Uint8Array(data); } function generateEBML (json) { const ebml = []; for (let i = 0; i < json.length; i++) { if (!('id' in json[i])) { // already encoded blob or byteArray ebml.push(json[i]); continue; } let data = json[i].data; if (typeof data === 'object') data = generateEBML(data); if (typeof data === 'number') data = ('size' in json[i]) ? numToFixedBuffer(data, json[i].size) : bitsToBuffer(data.toString(2)); if (typeof data === 'string') data = strToBuffer(data); const len = data.size || data.byteLength || data.length; const zeroes = Math.ceil(Math.ceil(Math.log(len) / Math.log(2)) / 8); const sizeStr = len.toString(2); const padded = (new Array((zeroes * 7 + 7 + 1) - sizeStr.length)).join('0') + sizeStr; const size = (new Array(zeroes)).join('0') + '1' + padded; // i actually dont quite understand what went on up there, so I'm not really // going to fix this, i'm probably just going to write some hacky thing which // converts that string into a buffer-esque thing ebml.push(numToBuffer(json[i].id)); ebml.push(bitsToBuffer(size)); ebml.push(data); } // convert ebml to an array const buffer = toFlatArray(ebml); return new Uint8Array(buffer); } function toFlatArray (arr, outBuffer) { if (outBuffer == null) { outBuffer = []; } for (let i = 0; i < arr.length; i++) { if (typeof arr[i] === 'object') { // an array toFlatArray(arr[i], outBuffer); } else { // a simple element outBuffer.push(arr[i]); } } return outBuffer; } function makeSimpleBlock (data) { let flags = 0; if (data.keyframe) flags |= 128; if (data.invisible) flags |= 8; if (data.lacing) flags |= (data.lacing << 1); if (data.discardable) flags |= 1; if (data.trackNum > 127) { throw new Error('TrackNumber > 127 not supported'); } const out = [data.trackNum | 0x80, data.timecode >> 8, data.timecode & 0xff, flags].map(function (e) { return String.fromCharCode(e); }).join('') + data.frame; return out; } // here's something else taken verbatim from weppy, awesome rite? function parseWebP (riff) { const VP8 = riff.RIFF[0].WEBP[0]; const frameStart = VP8.indexOf('\x9d\x01\x2a'); // A VP8 keyframe starts with the 0x9d012a header const c = []; for (let i = 0; i < 4; i++) c[i] = VP8.charCodeAt(frameStart + 3 + i); // the code below is literally copied verbatim from the bitstream spec let tmp = (c[1] << 8) | c[0]; const width = tmp & 0x3FFF; const horizontalScale = tmp >> 14; tmp = (c[3] << 8) | c[2]; const height = tmp & 0x3FFF; const verticalScale = tmp >> 14; return { width, height, horizontalScale, verticalScale, data: VP8, riff: riff }; } // i think i'm going off on a riff by pretending this is some known // idiom which i'm making a casual and brilliant pun about, but since // i can't find anything on google which conforms to this idiomatic // usage, I'm assuming this is just a consequence of some psychotic // break which makes me make up puns. well, enough riff-raff (aha a // rescue of sorts), this function was ripped wholesale from weppy function parseRIFF (string) { let offset = 0; const chunks = {}; while (offset < string.length) { const id = string.substr(offset, 4); chunks[id] = chunks[id] || []; if (id === 'RIFF' || id === 'LIST') { const len = parseInt(string.substr(offset + 4, 4).split('').map(function (i) { const unpadded = i.charCodeAt(0).toString(2); return (new Array(8 - unpadded.length + 1)).join('0') + unpadded; }).join(''), 2); const data = string.substr(offset + 4 + 4, len); offset += 4 + 4 + len; chunks[id].push(parseRIFF(data)); } else if (id === 'WEBP') { // Use (offset + 8) to skip past "VP8 "/"VP8L"/"VP8X" field after "WEBP" chunks[id].push(string.substr(offset + 8)); offset = string.length; } else { // Unknown chunk type; push entire payload chunks[id].push(string.substr(offset + 4)); offset = string.length; } } return chunks; } // here's a little utility function that acts as a utility for other functions // basically, the only purpose is for encoding "Duration", which is encoded as // a double (considerably more difficult to encode than an integer) function doubleToString (num) { return [].slice.call( new Uint8Array( ( new Float64Array([num]) // create a float64 array ).buffer) // extract the array buffer , 0) // convert the Uint8Array into a regular array .map(function (e) { // since it's a regular array, we can now use map return String.fromCharCode(e); // encode all the bytes individually }) .reverse() // correct the byte endianness (assume it's little endian for now) .join(''); // join the bytes in holy matrimony as a string } function WhammyVideo (speed, quality = 0.8) { // a more abstract-ish API this.frames = []; this.duration = 1000 / speed; this.quality = quality; } /** * * @param {string} frame * @param {number} [duration] */ WhammyVideo.prototype.add = function (frame, duration) { if (typeof duration !== 'undefined' && this.duration) throw new Error("you can't pass a duration if the fps is set"); if (typeof duration === 'undefined' && !this.duration) throw new Error("if you don't have the fps set, you need to have durations here."); if (frame.canvas) { // CanvasRenderingContext2D frame = frame.canvas; } if (frame.toDataURL) { // frame = frame.toDataURL('image/webp', this.quality); // quickly store image data so we don't block cpu. encode in compile method. frame = frame.getContext('2d').getImageData(0, 0, frame.width, frame.height); } else if (typeof frame !== 'string') { throw new TypeError('frame must be a a HTMLCanvasElement, a CanvasRenderingContext2D or a DataURI formatted string'); } if (typeof frame === 'string' && !(/^data:image\/webp;base64,/ig).test(frame)) { throw new Error('Input must be formatted properly as a base64 encoded DataURI of type image/webp'); } this.frames.push({ image: frame, duration: duration || this.duration }); }; WhammyVideo.prototype.compile = function (callback) { const webm = new ToWebM(this.frames.map(function (frame) { const webp = parseWebP(parseRIFF(atob(frame.image.slice(23)))); webp.duration = frame.duration; return webp; })); callback(webm); }; export const WebmGenerator = WhammyVideo;