Archive.worker.mjs (16214B)
1 /* This Source Code Form is subject to the terms of the Mozilla Public 2 * License, v. 2.0. If a copy of the MPL was not distributed with this 3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ 4 5 import { PromiseWorker } from "resource://gre/modules/workers/PromiseWorker.mjs"; 6 7 // The ArchiveUtils module is designed to be imported in both worker and 8 // main thread contexts. 9 /* eslint-disable mozilla/reject-import-system-module-from-non-system */ 10 import { ArchiveUtils } from "resource:///modules/backup/ArchiveUtils.sys.mjs"; 11 import { ArchiveEncryptor } from "resource:///modules/backup/ArchiveEncryption.sys.mjs"; 12 import { BackupError } from "resource:///modules/backup/BackupError.mjs"; 13 import { ERRORS } from "chrome://browser/content/backup/backup-constants.mjs"; 14 15 /** 16 * An ArchiveWorker is a PromiseWorker that tries to do most of the heavy 17 * lifting of dealing with single-file archives for backups, to avoid doing 18 * much on the main thread. This is mostly important for single-file archive 19 * _creation_, as this is supposed to occur silently in the background without 20 * the user noticing any degredation in performance. 21 */ 22 class ArchiveWorker { 23 #worker = null; 24 25 constructor() { 26 // Connect the provider to the worker. 27 this.#connectToPromiseWorker(); 28 } 29 30 /** 31 * Generates a boundary string that can be used to separate sections in a 32 * multipart/mixed MIME message. 33 * 34 * See https://www.w3.org/Protocols/rfc1341/7_2_Multipart.html. 35 * 36 * @returns {string} 37 */ 38 #generateBoundary() { 39 return ( 40 "----=_Part_" + 41 new Date().getTime() + 42 "_" + 43 Math.random().toString(36).slice(2, 12) + 44 "_" + 45 Math.random().toString(36).slice(2, 12) 46 ); 47 } 48 49 /** 50 * Calculates how many base64 bytes will be generated from some number of 51 * unencoded bytes. This presumes that the base64 bytes include a newline 52 * terminator at the end. 53 * 54 * @param {number} bytes 55 * The number of bytes to be converted to base64. 56 * @param {boolean} encrypting 57 * True if encryption via ArchiveEncryptor is being applied. 58 * @returns {number} 59 */ 60 #computeChunkBase64Bytes(bytes, encrypting) { 61 if (encrypting) { 62 bytes += ArchiveUtils.TAG_LENGTH_BYTES; 63 } 64 65 return 4 * Math.ceil(bytes / 3) + 1; 66 } 67 68 /** 69 * @typedef {object} EncryptionArgs 70 * @property {CryptoKey} publicKey 71 * The RSA-OAEP public key that will be used to derive keys for encrypting 72 * the backup. 73 * @property {CryptoKey} backupAuthKey 74 * The AES-GCM key that will be used to authenticate the owner of the 75 * backup. 76 * @property {Uint8Array} wrappedSecrets 77 * The encrypted backup secrets computed by ArchiveEncryptionState. 78 * @property {Uint8Array} salt 79 * A salt computed for the PBKDF2 stretching of the recovery code. 80 * @property {Uint8Array} nonce 81 * A nonce computed when wrapping the private key and OSKeyStore secret. 82 */ 83 84 /** 85 * Constructs a single-file archive for a backup on the filesystem. A 86 * single-file archive is a specially crafted HTML document that includes, 87 * among other things, an inlined multipart/mixed MIME message within a 88 * document comment. 89 * 90 * @param {object} params 91 * Arguments that are described in more detail below. 92 * @param {string} params.archivePath 93 * The path on the file system to write the single-file archive. 94 * @param {string} params.markup 95 * The HTML markup to insert into the archive file before the HTML 96 * comment block. This is the markup that will be rendered if the HTML 97 * file is opened in a web browser. 98 * @param {object} params.backupMetadata 99 * The metadata associated with this backup. This is a copy of the metadata 100 * object that is contained within the compressed backups' manifest. 101 * @param {string} params.compressedBackupSnapshotPath 102 * The path on the file system where the compressed backup file is located. 103 * @param {EncryptionArgs} [params.encryptionArgs=undefined] 104 * Optional EncryptionArgs, which will be used to encrypt this archive. 105 * @param {number} params.chunkSize 106 * The size of the chunks to break the byte stream into for encoding. 107 * @returns {Promise<undefined>} 108 */ 109 async constructArchive({ 110 archivePath, 111 markup, 112 backupMetadata, 113 compressedBackupSnapshotPath, 114 encryptionArgs, 115 chunkSize, 116 }) { 117 let encryptor = null; 118 if (encryptionArgs) { 119 encryptor = await ArchiveEncryptor.initialize( 120 encryptionArgs.publicKey, 121 encryptionArgs.backupAuthKey 122 ); 123 } 124 125 let boundary = this.#generateBoundary(); 126 127 let jsonBlock; 128 if (encryptor) { 129 jsonBlock = await encryptor.confirm( 130 backupMetadata, 131 encryptionArgs.wrappedSecrets, 132 encryptionArgs.salt, 133 encryptionArgs.nonce 134 ); 135 } else { 136 jsonBlock = { 137 version: ArchiveUtils.SCHEMA_VERSION, 138 encConfig: null, 139 meta: backupMetadata, 140 }; 141 } 142 143 let serializedJsonBlock = JSON.stringify(jsonBlock); 144 let textEncoder = new TextEncoder(); 145 let jsonBlockLength = textEncoder.encode(serializedJsonBlock).length; 146 147 // Once we get the ability to stream to the filesystem from IOUtils in a 148 // worker, we should use that instead of appending each of these chunks. 149 // 150 // This isn't supposed to be some kind of generalized MIME message 151 // generator, so we're happy to construct it by hand here. 152 await IOUtils.writeUTF8(archivePath, markup); 153 await IOUtils.writeUTF8( 154 archivePath, 155 ` 156 ${ArchiveUtils.INLINE_MIME_START_MARKER} 157 Content-Type: multipart/mixed; boundary="${boundary}" 158 159 --${boundary} 160 Content-Type: application/json; charset=utf-8 161 Content-Disposition: attachment; filename="archive.json" 162 Content-Length: ${jsonBlockLength} 163 164 ${JSON.stringify(jsonBlock)} 165 `, 166 { mode: "append" } 167 ); 168 169 let compressedBackupSnapshotFile = IOUtils.openFileForSyncReading( 170 compressedBackupSnapshotPath 171 ); 172 let totalBytesToRead = compressedBackupSnapshotFile.size; 173 174 // To calculate the Content-Length of the base64 block, we start by 175 // computing how many newlines we'll be adding... 176 let totalNewlines = Math.ceil(totalBytesToRead / chunkSize); 177 178 // Next, we determine how many full-sized chunks of chunkSize we'll be 179 // using, and multiply that by the number of base64 bytes that such a chunk 180 // will require. 181 let fullSizeChunks = totalNewlines - 1; 182 let fullSizeChunkBase64Bytes = this.#computeChunkBase64Bytes( 183 chunkSize, 184 !!encryptor 185 ); 186 let totalBase64Bytes = fullSizeChunks * fullSizeChunkBase64Bytes; 187 188 // Finally, if there are any leftover bytes that are less than chunkSize, 189 // determine how many bytes those will require, and add it to our total. 190 let leftoverChunkBytes = totalBytesToRead % chunkSize; 191 if (leftoverChunkBytes) { 192 totalBase64Bytes += this.#computeChunkBase64Bytes( 193 leftoverChunkBytes, 194 !!encryptor 195 ); 196 } else { 197 // We divided perfectly by chunkSize, so add another 198 // fullSizeChunkBase64Bytes to the total. 199 totalBase64Bytes += fullSizeChunkBase64Bytes; 200 } 201 202 await IOUtils.writeUTF8( 203 archivePath, 204 `--${boundary} 205 Content-Type: application/octet-stream 206 Content-Disposition: attachment; filename="archive.zip" 207 Content-Transfer-Encoding: base64 208 Content-Length: ${totalBase64Bytes} 209 210 `, 211 { mode: "append" } 212 ); 213 214 // And now we read in the bytes of the compressed file, base64 encode them, 215 // and append them to the document. Down the line, this is also where 216 // encryption will be done. 217 let currentIndex = 0; 218 while (currentIndex < totalBytesToRead) { 219 let bytesToRead = Math.min(chunkSize, totalBytesToRead - currentIndex); 220 if (bytesToRead <= 0) { 221 throw new BackupError( 222 "Failed to calculate the right number of bytes to read.", 223 ERRORS.FILE_SYSTEM_ERROR 224 ); 225 } 226 227 let buffer = new Uint8Array(bytesToRead); 228 compressedBackupSnapshotFile.readBytesInto(buffer, currentIndex); 229 230 let bytesToWrite; 231 232 if (encryptor) { 233 let isLastChunk = bytesToRead < chunkSize; 234 bytesToWrite = await encryptor.encrypt(buffer, isLastChunk); 235 } else { 236 bytesToWrite = buffer; 237 } 238 239 // We're very intentionally newline-separating these blocks here, as 240 // these blocks may have been run through encryption, and the same blocks 241 // must be run through decryption to unpack the archive. 242 // Newline-separation makes it easier to identify and manage these blocks. 243 await IOUtils.writeUTF8( 244 archivePath, 245 ArchiveUtils.arrayToBase64(bytesToWrite) + "\n", 246 { 247 mode: "append", 248 } 249 ); 250 251 currentIndex += bytesToRead; 252 } 253 254 await IOUtils.writeUTF8( 255 archivePath, 256 ` 257 --${boundary} 258 ${ArchiveUtils.INLINE_MIME_END_MARKER} 259 `, 260 { mode: "append" } 261 ); 262 263 compressedBackupSnapshotFile.close(); 264 265 return true; 266 } 267 268 /** 269 * @typedef {object} ArchiveHeaderResult 270 * @property {string} contentType 271 * The value of the Content-Type for the inlined MIME message. 272 * @property {number} startByteOffset 273 * The byte offset within the archive file where the inlined MIME message 274 * begins. 275 */ 276 277 /** 278 * Given a path to a single-file archive HTML file, this method will sniff 279 * the header of the file to make sure it matches one that we support. If 280 * successful, it will resolve with the contentType of the inline MIME 281 * message, as well as the byte offset for which the start of the inlined MIME 282 * message can be read from. 283 * 284 * @param {string} archivePath 285 * The path to a single-file archive HTML file. 286 * @returns {Promise<ArchiveHeaderResult, Error>} 287 */ 288 parseArchiveHeader(archivePath) { 289 // We expect the first bytes of the file to indicate that this is an HTML5 290 // file and to give us a version number we can handle. 291 let syncReadFile = IOUtils.openFileForSyncReading(archivePath); 292 let totalBytes = syncReadFile.size; 293 294 // This seems like a reasonable minimum number of bytes to read in to get 295 // at the header. If the header data isn't in there, then it's a corrupt 296 // file. 297 const MAX_BYTES_TO_READ = 256; 298 let headerBytesToRead = Math.min( 299 MAX_BYTES_TO_READ, 300 totalBytes - MAX_BYTES_TO_READ 301 ); 302 let headerBuffer = new Uint8Array(headerBytesToRead); 303 syncReadFile.readBytesInto(headerBuffer, 0); 304 305 let textDecoder = new TextDecoder(); 306 let decodedHeader = textDecoder.decode(headerBuffer); 307 const EXPECTED_HEADER = 308 /^<!DOCTYPE html>[\r\n]+<!-- Version: (\d+) -->[\r\n]+/; 309 let headerMatches = decodedHeader.match(EXPECTED_HEADER); 310 if (!headerMatches) { 311 throw new BackupError("Corrupt archive header", ERRORS.CORRUPTED_ARCHIVE); 312 } 313 314 let version = parseInt(headerMatches[1], 10); 315 // In the future, if we ever bump the ARCHIVE_FILE_VERSION, this is where we 316 // could place migrations / handlers for older archive versions. 317 if (version != ArchiveUtils.ARCHIVE_FILE_VERSION) { 318 throw new BackupError( 319 "Unsupported archive version: " + version, 320 ERRORS.UNSUPPORTED_BACKUP_VERSION 321 ); 322 } 323 324 // Now we have to scan forward, looking for the INLINE_MIME_MARKER_START 325 // and the Content-Type, which appears just before the MIME message. 326 // 327 // We scan by reading bytes into a buffer rather than reading in the whole 328 // file, since the file could be quite large (100s of MB). 329 let currentIndex = headerBuffer.byteLength; 330 331 let startByteOffset = 0; 332 // We keep the old buffer around, and always join it with the buffer that 333 // contains the recently read-in bytes. That way, we can account for the 334 // possibility that the INLINE_MIME_START_MARKER and Content-Type were 335 // only half-loaded in prior or current buffer. 336 let oldBuffer = headerBuffer; 337 let priorIndex = 0; 338 let contentType = null; 339 const EXPECTED_MARKER = new RegExp( 340 `${ArchiveUtils.INLINE_MIME_START_MARKER}\nContent-Type: (.+)\n\n` 341 ); 342 343 let textEncoder = new TextEncoder(); 344 while (currentIndex < totalBytes) { 345 let bytesToRead = Math.min(MAX_BYTES_TO_READ, totalBytes - currentIndex); 346 347 // This shouldn't happen, but better safe than sorry. 348 if (bytesToRead <= 0) { 349 throw new BackupError( 350 "Failed to calculate the proper number of bytes to read: " + 351 bytesToRead, 352 ERRORS.UNKNOWN 353 ); 354 } 355 356 let buffer = new Uint8Array(bytesToRead); 357 syncReadFile.readBytesInto(buffer, currentIndex); 358 359 let combinedBuffer = new Uint8Array( 360 oldBuffer.byteLength + buffer.byteLength 361 ); 362 combinedBuffer.set(oldBuffer, 0); 363 combinedBuffer.set(buffer, oldBuffer.byteLength); 364 365 // Now we look for the inline MIME marker, and try to extract the 366 // Content-Type for it. 367 let decodedString = textDecoder.decode(combinedBuffer); 368 let markerMatches = decodedString.match(EXPECTED_MARKER); 369 370 if (markerMatches) { 371 // If we found it, we want to find the byte index for the point 372 // immediately after the match. You'd think we could use 373 // decodedString.search for this, but unfortunately search returns 374 // character indexes and not byte indexes (and Unicode characters, 375 // which might be displayed in the markup of the page, are multiple 376 // bytes long). To work around this, we use a TextEncoder to encode 377 // everything leading up to the marker, and count the number of bytes. 378 // Since the buffer may have cut through a multibyte character, we 379 // also need to work around the workaround by discounting undecoded 380 // characters (which TextDecoder replaces with �).Then we count the 381 // number of bytes in our match. The sum of these two values, plus 382 // the priorIndex gives us the byte index of the point right after 383 // our regular expression match in a Unicode-character compatible way. 384 // 385 // This all presumes that the archive file was encoded as UTF-8. Since 386 // we control the generation of this file, this is a safe assumption. 387 388 let match = markerMatches[0]; 389 let matchBytes = textEncoder.encode(match).byteLength; 390 let matchIndex = decodedString.indexOf(match); 391 392 let numberOfUndecodedCharacters = 393 ArchiveUtils.countReplacementCharacters(decodedString); 394 // Skip the undecoded characters at the start of the string, 395 // if necessary 396 let substringUpToMatch = decodedString.slice( 397 numberOfUndecodedCharacters, 398 matchIndex 399 ); 400 let substringUpToMatchBytes = 401 textEncoder.encode(substringUpToMatch).byteLength; 402 403 startByteOffset = priorIndex + substringUpToMatchBytes + matchBytes; 404 contentType = markerMatches[1]; 405 break; 406 } 407 408 priorIndex = currentIndex; 409 currentIndex += bytesToRead; 410 oldBuffer = buffer; 411 } 412 413 syncReadFile.close(); 414 415 if (!contentType) { 416 throw new BackupError( 417 "Failed to find embedded data in archive", 418 ERRORS.CORRUPTED_ARCHIVE 419 ); 420 } 421 422 return { startByteOffset, contentType }; 423 } 424 425 /** 426 * Implements the standard boilerplate to make this class work as a 427 * PromiseWorker. 428 */ 429 #connectToPromiseWorker() { 430 this.#worker = new PromiseWorker.AbstractWorker(); 431 this.#worker.dispatch = (method, args = []) => { 432 if (!this[method]) { 433 throw new BackupError( 434 "Method does not exist: " + method, 435 ERRORS.INTERNAL_ERROR 436 ); 437 } 438 return this[method](...args); 439 }; 440 this.#worker.close = () => self.close(); 441 this.#worker.postMessage = (message, ...transfers) => { 442 self.postMessage(message, ...transfers); 443 }; 444 445 self.callMainThread = this.#worker.callMainThread.bind(this.#worker); 446 self.addEventListener("message", msg => this.#worker.handleMessage(msg)); 447 self.addEventListener("unhandledrejection", function (error) { 448 throw error.reason; 449 }); 450 } 451 } 452 453 new ArchiveWorker();