tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

Archive.worker.mjs (16214B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 import { PromiseWorker } from "resource://gre/modules/workers/PromiseWorker.mjs";
      6 
      7 // The ArchiveUtils module is designed to be imported in both worker and
      8 // main thread contexts.
      9 /* eslint-disable mozilla/reject-import-system-module-from-non-system */
     10 import { ArchiveUtils } from "resource:///modules/backup/ArchiveUtils.sys.mjs";
     11 import { ArchiveEncryptor } from "resource:///modules/backup/ArchiveEncryption.sys.mjs";
     12 import { BackupError } from "resource:///modules/backup/BackupError.mjs";
     13 import { ERRORS } from "chrome://browser/content/backup/backup-constants.mjs";
     14 
     15 /**
     16 * An ArchiveWorker is a PromiseWorker that tries to do most of the heavy
     17 * lifting of dealing with single-file archives for backups, to avoid doing
     18 * much on the main thread. This is mostly important for single-file archive
     19 * _creation_, as this is supposed to occur silently in the background without
     20 * the user noticing any degredation in performance.
     21 */
     22 class ArchiveWorker {
     23  #worker = null;
     24 
     25  constructor() {
     26    // Connect the provider to the worker.
     27    this.#connectToPromiseWorker();
     28  }
     29 
     30  /**
     31   * Generates a boundary string that can be used to separate sections in a
     32   * multipart/mixed MIME message.
     33   *
     34   * See https://www.w3.org/Protocols/rfc1341/7_2_Multipart.html.
     35   *
     36   * @returns {string}
     37   */
     38  #generateBoundary() {
     39    return (
     40      "----=_Part_" +
     41      new Date().getTime() +
     42      "_" +
     43      Math.random().toString(36).slice(2, 12) +
     44      "_" +
     45      Math.random().toString(36).slice(2, 12)
     46    );
     47  }
     48 
     49  /**
     50   * Calculates how many base64 bytes will be generated from some number of
     51   * unencoded bytes. This presumes that the base64 bytes include a newline
     52   * terminator at the end.
     53   *
     54   * @param {number} bytes
     55   *   The number of bytes to be converted to base64.
     56   * @param {boolean} encrypting
     57   *   True if encryption via ArchiveEncryptor is being applied.
     58   * @returns {number}
     59   */
     60  #computeChunkBase64Bytes(bytes, encrypting) {
     61    if (encrypting) {
     62      bytes += ArchiveUtils.TAG_LENGTH_BYTES;
     63    }
     64 
     65    return 4 * Math.ceil(bytes / 3) + 1;
     66  }
     67 
     68  /**
     69   * @typedef {object} EncryptionArgs
     70   * @property {CryptoKey} publicKey
     71   *   The RSA-OAEP public key that will be used to derive keys for encrypting
     72   *   the backup.
     73   * @property {CryptoKey} backupAuthKey
     74   *   The AES-GCM key that will be used to authenticate the owner of the
     75   *   backup.
     76   * @property {Uint8Array} wrappedSecrets
     77   *   The encrypted backup secrets computed by ArchiveEncryptionState.
     78   * @property {Uint8Array} salt
     79   *   A salt computed for the PBKDF2 stretching of the recovery code.
     80   * @property {Uint8Array} nonce
     81   *   A nonce computed when wrapping the private key and OSKeyStore secret.
     82   */
     83 
     84  /**
     85   * Constructs a single-file archive for a backup on the filesystem. A
     86   * single-file archive is a specially crafted HTML document that includes,
     87   * among other things, an inlined multipart/mixed MIME message within a
     88   * document comment.
     89   *
     90   * @param {object} params
     91   *   Arguments that are described in more detail below.
     92   * @param {string} params.archivePath
     93   *   The path on the file system to write the single-file archive.
     94   * @param {string} params.markup
     95   *   The HTML markup to insert into the archive file before the HTML
     96   *   comment block. This is the markup that will be rendered if the HTML
     97   *   file is opened in a web browser.
     98   * @param {object} params.backupMetadata
     99   *   The metadata associated with this backup. This is a copy of the metadata
    100   *   object that is contained within the compressed backups' manifest.
    101   * @param {string} params.compressedBackupSnapshotPath
    102   *   The path on the file system where the compressed backup file is located.
    103   * @param {EncryptionArgs} [params.encryptionArgs=undefined]
    104   *   Optional EncryptionArgs, which will be used to encrypt this archive.
    105   * @param {number} params.chunkSize
    106   *   The size of the chunks to break the byte stream into for encoding.
    107   * @returns {Promise<undefined>}
    108   */
    109  async constructArchive({
    110    archivePath,
    111    markup,
    112    backupMetadata,
    113    compressedBackupSnapshotPath,
    114    encryptionArgs,
    115    chunkSize,
    116  }) {
    117    let encryptor = null;
    118    if (encryptionArgs) {
    119      encryptor = await ArchiveEncryptor.initialize(
    120        encryptionArgs.publicKey,
    121        encryptionArgs.backupAuthKey
    122      );
    123    }
    124 
    125    let boundary = this.#generateBoundary();
    126 
    127    let jsonBlock;
    128    if (encryptor) {
    129      jsonBlock = await encryptor.confirm(
    130        backupMetadata,
    131        encryptionArgs.wrappedSecrets,
    132        encryptionArgs.salt,
    133        encryptionArgs.nonce
    134      );
    135    } else {
    136      jsonBlock = {
    137        version: ArchiveUtils.SCHEMA_VERSION,
    138        encConfig: null,
    139        meta: backupMetadata,
    140      };
    141    }
    142 
    143    let serializedJsonBlock = JSON.stringify(jsonBlock);
    144    let textEncoder = new TextEncoder();
    145    let jsonBlockLength = textEncoder.encode(serializedJsonBlock).length;
    146 
    147    // Once we get the ability to stream to the filesystem from IOUtils in a
    148    // worker, we should use that instead of appending each of these chunks.
    149    //
    150    // This isn't supposed to be some kind of generalized MIME message
    151    // generator, so we're happy to construct it by hand here.
    152    await IOUtils.writeUTF8(archivePath, markup);
    153    await IOUtils.writeUTF8(
    154      archivePath,
    155      `
    156 ${ArchiveUtils.INLINE_MIME_START_MARKER}
    157 Content-Type: multipart/mixed; boundary="${boundary}"
    158 
    159 --${boundary}
    160 Content-Type: application/json; charset=utf-8
    161 Content-Disposition: attachment; filename="archive.json"
    162 Content-Length: ${jsonBlockLength}
    163 
    164 ${JSON.stringify(jsonBlock)}
    165 `,
    166      { mode: "append" }
    167    );
    168 
    169    let compressedBackupSnapshotFile = IOUtils.openFileForSyncReading(
    170      compressedBackupSnapshotPath
    171    );
    172    let totalBytesToRead = compressedBackupSnapshotFile.size;
    173 
    174    // To calculate the Content-Length of the base64 block, we start by
    175    // computing how many newlines we'll be adding...
    176    let totalNewlines = Math.ceil(totalBytesToRead / chunkSize);
    177 
    178    // Next, we determine how many full-sized chunks of chunkSize we'll be
    179    // using, and multiply that by the number of base64 bytes that such a chunk
    180    // will require.
    181    let fullSizeChunks = totalNewlines - 1;
    182    let fullSizeChunkBase64Bytes = this.#computeChunkBase64Bytes(
    183      chunkSize,
    184      !!encryptor
    185    );
    186    let totalBase64Bytes = fullSizeChunks * fullSizeChunkBase64Bytes;
    187 
    188    // Finally, if there are any leftover bytes that are less than chunkSize,
    189    // determine how many bytes those will require, and add it to our total.
    190    let leftoverChunkBytes = totalBytesToRead % chunkSize;
    191    if (leftoverChunkBytes) {
    192      totalBase64Bytes += this.#computeChunkBase64Bytes(
    193        leftoverChunkBytes,
    194        !!encryptor
    195      );
    196    } else {
    197      // We divided perfectly by chunkSize, so add another
    198      // fullSizeChunkBase64Bytes to the total.
    199      totalBase64Bytes += fullSizeChunkBase64Bytes;
    200    }
    201 
    202    await IOUtils.writeUTF8(
    203      archivePath,
    204      `--${boundary}
    205 Content-Type: application/octet-stream
    206 Content-Disposition: attachment; filename="archive.zip"
    207 Content-Transfer-Encoding: base64
    208 Content-Length: ${totalBase64Bytes}
    209 
    210 `,
    211      { mode: "append" }
    212    );
    213 
    214    // And now we read in the bytes of the compressed file, base64 encode them,
    215    // and append them to the document. Down the line, this is also where
    216    // encryption will be done.
    217    let currentIndex = 0;
    218    while (currentIndex < totalBytesToRead) {
    219      let bytesToRead = Math.min(chunkSize, totalBytesToRead - currentIndex);
    220      if (bytesToRead <= 0) {
    221        throw new BackupError(
    222          "Failed to calculate the right number of bytes to read.",
    223          ERRORS.FILE_SYSTEM_ERROR
    224        );
    225      }
    226 
    227      let buffer = new Uint8Array(bytesToRead);
    228      compressedBackupSnapshotFile.readBytesInto(buffer, currentIndex);
    229 
    230      let bytesToWrite;
    231 
    232      if (encryptor) {
    233        let isLastChunk = bytesToRead < chunkSize;
    234        bytesToWrite = await encryptor.encrypt(buffer, isLastChunk);
    235      } else {
    236        bytesToWrite = buffer;
    237      }
    238 
    239      // We're very intentionally newline-separating these blocks here, as
    240      // these blocks may have been run through encryption, and the same blocks
    241      // must be run through decryption to unpack the archive.
    242      // Newline-separation makes it easier to identify and manage these blocks.
    243      await IOUtils.writeUTF8(
    244        archivePath,
    245        ArchiveUtils.arrayToBase64(bytesToWrite) + "\n",
    246        {
    247          mode: "append",
    248        }
    249      );
    250 
    251      currentIndex += bytesToRead;
    252    }
    253 
    254    await IOUtils.writeUTF8(
    255      archivePath,
    256      `
    257 --${boundary}
    258 ${ArchiveUtils.INLINE_MIME_END_MARKER}
    259 `,
    260      { mode: "append" }
    261    );
    262 
    263    compressedBackupSnapshotFile.close();
    264 
    265    return true;
    266  }
    267 
    268  /**
    269   * @typedef {object} ArchiveHeaderResult
    270   * @property {string} contentType
    271   *   The value of the Content-Type for the inlined MIME message.
    272   * @property {number} startByteOffset
    273   *   The byte offset within the archive file where the inlined MIME message
    274   *   begins.
    275   */
    276 
    277  /**
    278   * Given a path to a single-file archive HTML file, this method will sniff
    279   * the header of the file to make sure it matches one that we support. If
    280   * successful, it will resolve with the contentType of the inline MIME
    281   * message, as well as the byte offset for which the start of the inlined MIME
    282   * message can be read from.
    283   *
    284   * @param {string} archivePath
    285   *   The path to a single-file archive HTML file.
    286   * @returns {Promise<ArchiveHeaderResult, Error>}
    287   */
    288  parseArchiveHeader(archivePath) {
    289    // We expect the first bytes of the file to indicate that this is an HTML5
    290    // file and to give us a version number we can handle.
    291    let syncReadFile = IOUtils.openFileForSyncReading(archivePath);
    292    let totalBytes = syncReadFile.size;
    293 
    294    // This seems like a reasonable minimum number of bytes to read in to get
    295    // at the header. If the header data isn't in there, then it's a corrupt
    296    // file.
    297    const MAX_BYTES_TO_READ = 256;
    298    let headerBytesToRead = Math.min(
    299      MAX_BYTES_TO_READ,
    300      totalBytes - MAX_BYTES_TO_READ
    301    );
    302    let headerBuffer = new Uint8Array(headerBytesToRead);
    303    syncReadFile.readBytesInto(headerBuffer, 0);
    304 
    305    let textDecoder = new TextDecoder();
    306    let decodedHeader = textDecoder.decode(headerBuffer);
    307    const EXPECTED_HEADER =
    308      /^<!DOCTYPE html>[\r\n]+<!-- Version: (\d+) -->[\r\n]+/;
    309    let headerMatches = decodedHeader.match(EXPECTED_HEADER);
    310    if (!headerMatches) {
    311      throw new BackupError("Corrupt archive header", ERRORS.CORRUPTED_ARCHIVE);
    312    }
    313 
    314    let version = parseInt(headerMatches[1], 10);
    315    // In the future, if we ever bump the ARCHIVE_FILE_VERSION, this is where we
    316    // could place migrations / handlers for older archive versions.
    317    if (version != ArchiveUtils.ARCHIVE_FILE_VERSION) {
    318      throw new BackupError(
    319        "Unsupported archive version: " + version,
    320        ERRORS.UNSUPPORTED_BACKUP_VERSION
    321      );
    322    }
    323 
    324    // Now we have to scan forward, looking for the INLINE_MIME_MARKER_START
    325    // and the Content-Type, which appears just before the MIME message.
    326    //
    327    // We scan by reading bytes into a buffer rather than reading in the whole
    328    // file, since the file could be quite large (100s of MB).
    329    let currentIndex = headerBuffer.byteLength;
    330 
    331    let startByteOffset = 0;
    332    // We keep the old buffer around, and always join it with the buffer that
    333    // contains the recently read-in bytes. That way, we can account for the
    334    // possibility that the INLINE_MIME_START_MARKER and Content-Type were
    335    // only half-loaded in prior or current buffer.
    336    let oldBuffer = headerBuffer;
    337    let priorIndex = 0;
    338    let contentType = null;
    339    const EXPECTED_MARKER = new RegExp(
    340      `${ArchiveUtils.INLINE_MIME_START_MARKER}\nContent-Type: (.+)\n\n`
    341    );
    342 
    343    let textEncoder = new TextEncoder();
    344    while (currentIndex < totalBytes) {
    345      let bytesToRead = Math.min(MAX_BYTES_TO_READ, totalBytes - currentIndex);
    346 
    347      // This shouldn't happen, but better safe than sorry.
    348      if (bytesToRead <= 0) {
    349        throw new BackupError(
    350          "Failed to calculate the proper number of bytes to read: " +
    351            bytesToRead,
    352          ERRORS.UNKNOWN
    353        );
    354      }
    355 
    356      let buffer = new Uint8Array(bytesToRead);
    357      syncReadFile.readBytesInto(buffer, currentIndex);
    358 
    359      let combinedBuffer = new Uint8Array(
    360        oldBuffer.byteLength + buffer.byteLength
    361      );
    362      combinedBuffer.set(oldBuffer, 0);
    363      combinedBuffer.set(buffer, oldBuffer.byteLength);
    364 
    365      // Now we look for the inline MIME marker, and try to extract the
    366      // Content-Type for it.
    367      let decodedString = textDecoder.decode(combinedBuffer);
    368      let markerMatches = decodedString.match(EXPECTED_MARKER);
    369 
    370      if (markerMatches) {
    371        // If we found it, we want to find the byte index for the point
    372        // immediately after the match. You'd think we could use
    373        // decodedString.search for this, but unfortunately search returns
    374        // character indexes and not byte indexes (and Unicode characters,
    375        // which might be displayed in the markup of the page, are multiple
    376        // bytes long). To work around this, we use a TextEncoder to encode
    377        // everything leading up to the marker, and count the number of bytes.
    378        // Since the buffer may have cut through a multibyte character, we
    379        // also need to work around the workaround by discounting undecoded
    380        // characters (which TextDecoder replaces with �).Then we count the
    381        // number of bytes in our match. The sum of these two values, plus
    382        // the priorIndex gives us the byte index of the point right after
    383        // our regular expression match in a Unicode-character compatible way.
    384        //
    385        // This all presumes that the archive file was encoded as UTF-8. Since
    386        // we control the generation of this file, this is a safe assumption.
    387 
    388        let match = markerMatches[0];
    389        let matchBytes = textEncoder.encode(match).byteLength;
    390        let matchIndex = decodedString.indexOf(match);
    391 
    392        let numberOfUndecodedCharacters =
    393          ArchiveUtils.countReplacementCharacters(decodedString);
    394        // Skip the undecoded characters at the start of the string,
    395        // if necessary
    396        let substringUpToMatch = decodedString.slice(
    397          numberOfUndecodedCharacters,
    398          matchIndex
    399        );
    400        let substringUpToMatchBytes =
    401          textEncoder.encode(substringUpToMatch).byteLength;
    402 
    403        startByteOffset = priorIndex + substringUpToMatchBytes + matchBytes;
    404        contentType = markerMatches[1];
    405        break;
    406      }
    407 
    408      priorIndex = currentIndex;
    409      currentIndex += bytesToRead;
    410      oldBuffer = buffer;
    411    }
    412 
    413    syncReadFile.close();
    414 
    415    if (!contentType) {
    416      throw new BackupError(
    417        "Failed to find embedded data in archive",
    418        ERRORS.CORRUPTED_ARCHIVE
    419      );
    420    }
    421 
    422    return { startByteOffset, contentType };
    423  }
    424 
    425  /**
    426   * Implements the standard boilerplate to make this class work as a
    427   * PromiseWorker.
    428   */
    429  #connectToPromiseWorker() {
    430    this.#worker = new PromiseWorker.AbstractWorker();
    431    this.#worker.dispatch = (method, args = []) => {
    432      if (!this[method]) {
    433        throw new BackupError(
    434          "Method does not exist: " + method,
    435          ERRORS.INTERNAL_ERROR
    436        );
    437      }
    438      return this[method](...args);
    439    };
    440    this.#worker.close = () => self.close();
    441    this.#worker.postMessage = (message, ...transfers) => {
    442      self.postMessage(message, ...transfers);
    443    };
    444 
    445    self.callMainThread = this.#worker.callMainThread.bind(this.#worker);
    446    self.addEventListener("message", msg => this.#worker.handleMessage(msg));
    447    self.addEventListener("unhandledrejection", function (error) {
    448      throw error.reason;
    449    });
    450  }
    451 }
    452 
    453 new ArchiveWorker();