tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

har-builder.js (19689B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 
      5 "use strict";
      6 
      7 const appInfo = Services.appinfo;
      8 const { LocalizationHelper } = require("resource://devtools/shared/l10n.js");
      9 const { CurlUtils } = require("resource://devtools/client/shared/curl.js");
     10 const {
     11  getFormDataSections,
     12  getUrlQuery,
     13  parseQueryString,
     14 } = require("resource://devtools/client/netmonitor/src/utils/request-utils.js");
     15 const {
     16  buildHarLog,
     17 } = require("resource://devtools/client/netmonitor/src/har/har-builder-utils.js");
     18 const L10N = new LocalizationHelper("devtools/client/locales/har.properties");
     19 const {
     20  TIMING_KEYS,
     21 } = require("resource://devtools/client/netmonitor/src/constants.js");
     22 
     23 /**
     24 * This object is responsible for building HAR file. See HAR spec:
     25 * https://dvcs.w3.org/hg/webperf/raw-file/tip/specs/HAR/Overview.html
     26 * http://www.softwareishard.com/blog/har-12-spec/
     27 */
     28 class HarBuilder {
     29  /**
     30   * @param {object} options
     31   *        configuration object
     32   * @param {boolean} options.connector
     33   *        Set to true to include HTTP response bodies in the result data
     34   *        structure.
     35   * @param {string} options.id
     36   *        ID of the exported page.
     37   * @param {boolean} options.includeResponseBodies
     38   *        Set to true to include HTTP response bodies in the result data
     39   *        structure.
     40   * @param {Array} options.items
     41   *        List of network events to be exported.
     42   * @param {boolean} options.supportsMultiplePages
     43   *        Set to true to create distinct page entries for each navigation.
     44   */
     45  constructor(options) {
     46    this._connector = options.connector;
     47    this._id = options.id;
     48    this._includeResponseBodies = options.includeResponseBodies;
     49    this._items = options.items;
     50    // Page id counter, only used when options.supportsMultiplePages is true.
     51    this._pageId = options.supportsMultiplePages ? 0 : options.id;
     52    this._pageMap = [];
     53    this._supportsMultiplePages = options.supportsMultiplePages;
     54    this._url = this._connector.currentTarget.url;
     55  }
     56 
     57  // Public API
     58 
     59  /**
     60   * This is the main method used to build the entire result HAR data.
     61   * The process is asynchronous since it can involve additional RDP
     62   * communication (e.g. resolving long strings).
     63   *
     64   * @returns {Promise} A promise that resolves to the HAR object when
     65   * the entire build process is done.
     66   */
     67  async build() {
     68    this.promises = [];
     69 
     70    // Build basic structure for data.
     71    const harLog = buildHarLog(appInfo);
     72 
     73    // Build pages.
     74    this.buildPages(harLog.log);
     75 
     76    // Build entries.
     77    for (const request of this._items) {
     78      const entry = await this.buildEntry(harLog.log, request);
     79      if (entry) {
     80        harLog.log.entries.push(entry);
     81      }
     82    }
     83 
     84    // Some data needs to be fetched from the backend during the
     85    // build process, so wait till all is done.
     86    await Promise.all(this.promises);
     87 
     88    return harLog;
     89  }
     90 
     91  // Helpers
     92  buildPages(log) {
     93    if (this._supportsMultiplePages) {
     94      this.buildPagesFromTargetTitles(log);
     95    } else if (this._items.length) {
     96      const firstRequest = this._items[0];
     97      const page = this.buildPage(this._url, firstRequest);
     98      log.pages.push(page);
     99      this._pageMap[this._id] = page;
    100    }
    101  }
    102 
    103  buildPagesFromTargetTitles(log) {
    104    // Retrieve the additional HAR data collected by the connector.
    105    const { initialURL, navigationRequests } = this._connector.getHarData();
    106    const firstNavigationRequest = navigationRequests[0];
    107    const firstRequest = this._items[0];
    108 
    109    if (
    110      !firstNavigationRequest ||
    111      firstRequest.resourceId !== firstNavigationRequest.resourceId
    112    ) {
    113      // If the first request is not a navigation request, it must be related
    114      // to the initial page. Create a first page entry for such early requests.
    115      const initialPage = this.buildPage(initialURL, firstRequest);
    116      log.pages.push(initialPage);
    117    }
    118 
    119    for (const request of navigationRequests) {
    120      const page = this.buildPage(request.url, request);
    121      log.pages.push(page);
    122    }
    123  }
    124 
    125  buildPage(url, networkEvent) {
    126    const page = {};
    127 
    128    page.id = "page_" + this._pageId;
    129    page.pageTimings = this.buildPageTimings(page, networkEvent);
    130    page.startedDateTime = dateToHarString(new Date(networkEvent.startedMs));
    131 
    132    // To align with other existing implementations of HAR exporters, the title
    133    // should contain the page URL and not the page title.
    134    page.title = url;
    135 
    136    // Increase the pageId, for upcoming calls to buildPage.
    137    // If supportsMultiplePages is disabled this method is only called once.
    138    this._pageId++;
    139 
    140    return page;
    141  }
    142 
    143  getPage(log, entry) {
    144    const existingPage = log.pages.findLast(
    145      ({ startedDateTime }) => startedDateTime <= entry.startedDateTime
    146    );
    147 
    148    if (!existingPage) {
    149      throw new Error(
    150        "Could not find a page for request: " + entry.request.url
    151      );
    152    }
    153 
    154    return existingPage;
    155  }
    156 
    157  async buildEntry(log, networkEvent) {
    158    const entry = {};
    159    entry.startedDateTime = dateToHarString(new Date(networkEvent.startedMs));
    160 
    161    let { eventTimings, id } = networkEvent;
    162    try {
    163      if (!eventTimings && this._connector.requestData) {
    164        eventTimings = await this._connector.requestData(id, "eventTimings");
    165      }
    166 
    167      entry.request = await this.buildRequest(networkEvent);
    168      entry.response = await this.buildResponse(networkEvent);
    169      entry.cache = await this.buildCache(networkEvent);
    170    } catch (e) {
    171      // Ignore any request for which we can't retrieve lazy data
    172      // The request has most likely been destroyed on the server side,
    173      // either because persist is disabled or the request's target/WindowGlobal/process
    174      // has been destroyed.
    175      console.warn("HAR builder failed on", networkEvent.url, e, e.stack);
    176      return null;
    177    }
    178    entry.timings = eventTimings ? eventTimings.timings : {};
    179 
    180    // Calculate total time by summing all timings. Note that
    181    // `networkEvent.totalTime` can't be used since it doesn't have to
    182    // correspond to plain summary of individual timings.
    183    // With TCP Fast Open and TLS early data sending data can
    184    // start at the same time as connect (we can send data on
    185    // TCP syn packet). Also TLS handshake can carry application
    186    // data thereby overlapping a sending data period and TLS
    187    // handshake period.
    188    entry.time = TIMING_KEYS.reduce((sum, type) => {
    189      const time = entry.timings[type];
    190      return typeof time != "undefined" && time != -1 ? sum + time : sum;
    191    }, 0);
    192 
    193    // Security state isn't part of HAR spec, and so create
    194    // custom field that needs to use '_' prefix.
    195    entry._securityState = networkEvent.securityState;
    196 
    197    if (networkEvent.remoteAddress) {
    198      entry.serverIPAddress = networkEvent.remoteAddress;
    199    }
    200 
    201    if (networkEvent.remotePort) {
    202      entry.connection = networkEvent.remotePort + "";
    203    }
    204 
    205    const page = this.getPage(log, entry);
    206    entry.pageref = page.id;
    207 
    208    return entry;
    209  }
    210 
    211  buildPageTimings() {
    212    // Event timing info isn't available
    213    const timings = {
    214      onContentLoad: -1,
    215      onLoad: -1,
    216    };
    217 
    218    // TODO: This method currently ignores the networkEvent and always retrieves
    219    // the same timing markers for all pages. Seee Bug 1833806.
    220    if (this._connector.getTimingMarker) {
    221      timings.onContentLoad = this._connector.getTimingMarker(
    222        "firstDocumentDOMContentLoadedTimestamp"
    223      );
    224      timings.onLoad = this._connector.getTimingMarker(
    225        "firstDocumentLoadTimestamp"
    226      );
    227    }
    228 
    229    return timings;
    230  }
    231 
    232  async buildRequest(networkEvent) {
    233    // When using HarAutomation, HarCollector will automatically fetch requestHeaders
    234    // and requestCookies, but when we use it from netmonitor, FirefoxDataProvider
    235    // should fetch it itself lazily, via requestData.
    236    let { id, requestHeaders } = networkEvent;
    237    if (!requestHeaders && this._connector.requestData) {
    238      requestHeaders = await this._connector.requestData(id, "requestHeaders");
    239    }
    240 
    241    let { requestCookies } = networkEvent;
    242    if (!requestCookies && this._connector.requestData) {
    243      requestCookies = await this._connector.requestData(id, "requestCookies");
    244    }
    245 
    246    const request = {
    247      bodySize: 0,
    248    };
    249    request.method = networkEvent.method;
    250    request.url = networkEvent.url;
    251    request.httpVersion = networkEvent.httpVersion || "";
    252    request.headers = this.buildHeaders(requestHeaders);
    253    request.headers = this.appendHeadersPostData(request.headers, networkEvent);
    254    request.cookies = this.buildCookies(requestCookies);
    255    request.queryString = parseQueryString(getUrlQuery(networkEvent.url)) || [];
    256    request.headersSize = requestHeaders.headersSize;
    257    request.postData = await this.buildPostData(networkEvent);
    258 
    259    if (request.postData?.text) {
    260      request.bodySize = request.postData.text.length;
    261    }
    262 
    263    return request;
    264  }
    265 
    266  /**
    267   * Fetch all header values from the backend (if necessary) and
    268   * build the result HAR structure.
    269   *
    270   * @param {object} input Request or response header object.
    271   */
    272  buildHeaders(input) {
    273    if (!input) {
    274      return [];
    275    }
    276 
    277    return this.buildNameValuePairs(input.headers);
    278  }
    279 
    280  appendHeadersPostData(input = [], networkEvent) {
    281    if (!networkEvent.requestPostData) {
    282      return input;
    283    }
    284 
    285    this.fetchData(networkEvent.requestPostData.postData.text).then(value => {
    286      const multipartHeaders = CurlUtils.getHeadersFromMultipartText(value);
    287      for (const header of multipartHeaders) {
    288        input.push(header);
    289      }
    290    });
    291 
    292    return input;
    293  }
    294 
    295  buildCookies(input) {
    296    if (!input) {
    297      return [];
    298    }
    299 
    300    return this.buildNameValuePairs(input.cookies || input);
    301  }
    302 
    303  buildNameValuePairs(entries) {
    304    const result = [];
    305 
    306    // HAR requires headers array to be presented, so always
    307    // return at least an empty array.
    308    if (!entries) {
    309      return result;
    310    }
    311 
    312    // Make sure header values are fully fetched from the server.
    313    entries.forEach(entry => {
    314      this.fetchData(entry.value).then(value => {
    315        result.push({
    316          name: entry.name,
    317          value,
    318        });
    319      });
    320    });
    321 
    322    return result;
    323  }
    324 
    325  async buildPostData(networkEvent) {
    326    // When using HarAutomation, HarCollector will automatically fetch requestPostData
    327    // and requestHeaders, but when we use it from netmonitor, FirefoxDataProvider
    328    // should fetch it itself lazily, via requestData.
    329    let { id, requestHeaders, requestPostData } = networkEvent;
    330    let requestHeadersFromUploadStream;
    331 
    332    if (!requestPostData && this._connector.requestData) {
    333      requestPostData = await this._connector.requestData(
    334        id,
    335        "requestPostData"
    336      );
    337      requestHeadersFromUploadStream = requestPostData.uploadHeaders;
    338    }
    339 
    340    if (!requestPostData.postData.text) {
    341      return undefined;
    342    }
    343 
    344    if (!requestHeaders && this._connector.requestData) {
    345      requestHeaders = await this._connector.requestData(id, "requestHeaders");
    346    }
    347 
    348    const postData = {
    349      mimeType: findValue(requestHeaders.headers, "content-type"),
    350      params: [],
    351      text: requestPostData.postData.text,
    352    };
    353 
    354    if (requestPostData.postDataDiscarded) {
    355      postData.comment = L10N.getStr("har.requestBodyNotIncluded");
    356      return postData;
    357    }
    358 
    359    // If we are dealing with URL encoded body, parse parameters.
    360    if (
    361      CurlUtils.isUrlEncodedRequest({
    362        headers: requestHeaders.headers,
    363        postDataText: postData.text,
    364      })
    365    ) {
    366      postData.mimeType = "application/x-www-form-urlencoded";
    367      // Extract form parameters and produce nice HAR array.
    368      const formDataSections = await getFormDataSections(
    369        requestHeaders,
    370        requestHeadersFromUploadStream,
    371        requestPostData,
    372        this._connector.getLongString
    373      );
    374 
    375      formDataSections.forEach(section => {
    376        const paramsArray = parseQueryString(section);
    377        if (paramsArray) {
    378          postData.params = [...postData.params, ...paramsArray];
    379        }
    380      });
    381    }
    382 
    383    return postData;
    384  }
    385 
    386  async buildResponse(networkEvent) {
    387    // When using HarAutomation, HarCollector will automatically fetch responseHeaders
    388    // and responseCookies, but when we use it from netmonitor, FirefoxDataProvider
    389    // should fetch it itself lazily, via requestData.
    390 
    391    let { id, responseCookies, responseHeaders } = networkEvent;
    392    if (!responseHeaders && this._connector.requestData) {
    393      responseHeaders = await this._connector.requestData(
    394        id,
    395        "responseHeaders"
    396      );
    397    }
    398 
    399    if (!responseCookies && this._connector.requestData) {
    400      responseCookies = await this._connector.requestData(
    401        id,
    402        "responseCookies"
    403      );
    404    }
    405 
    406    const response = {
    407      status: 0,
    408    };
    409 
    410    // Arbitrary value if it's aborted to make sure status has a number
    411    if (networkEvent.status) {
    412      response.status = parseInt(networkEvent.status, 10);
    413    }
    414    response.statusText = networkEvent.statusText || "";
    415    response.httpVersion = networkEvent.httpVersion || "";
    416 
    417    response.headers = this.buildHeaders(responseHeaders);
    418    response.cookies = this.buildCookies(responseCookies);
    419    response.content = await this.buildContent(networkEvent);
    420 
    421    const headers = responseHeaders ? responseHeaders.headers : null;
    422    const headersSize = responseHeaders ? responseHeaders.headersSize : -1;
    423 
    424    response.redirectURL = findValue(headers, "Location");
    425    response.headersSize = headersSize;
    426 
    427    // 'bodySize' is size of the received response body in bytes.
    428    // Set to zero in case of responses coming from the cache (304).
    429    // Set to -1 if the info is not available.
    430    if (typeof networkEvent.transferredSize != "number") {
    431      response.bodySize = response.status == 304 ? 0 : -1;
    432    } else {
    433      response.bodySize = networkEvent.transferredSize;
    434    }
    435 
    436    return response;
    437  }
    438 
    439  async buildContent(networkEvent) {
    440    const content = {
    441      mimeType: networkEvent.mimeType,
    442      size: -1,
    443    };
    444 
    445    // When using HarAutomation, HarCollector will automatically fetch responseContent,
    446    // but when we use it from netmonitor, FirefoxDataProvider should fetch it itself
    447    // lazily, via requestData.
    448    let { responseContent } = networkEvent;
    449    if (!responseContent && this._connector.requestData) {
    450      responseContent = await this._connector.requestData(
    451        networkEvent.id,
    452        "responseContent"
    453      );
    454    }
    455    if (responseContent?.content) {
    456      content.size = responseContent.content.size;
    457      content.encoding = responseContent.content.encoding;
    458    }
    459 
    460    const includeBodies = this._includeResponseBodies;
    461    const contentDiscarded = responseContent
    462      ? responseContent.contentDiscarded
    463      : false;
    464 
    465    // The comment is appended only if the response content
    466    // is explicitly discarded.
    467    if (!includeBodies || contentDiscarded) {
    468      content.comment = L10N.getStr("har.responseBodyNotIncluded");
    469      return content;
    470    }
    471 
    472    if (responseContent) {
    473      const { text } = responseContent.content;
    474      this.fetchData(text).then(value => {
    475        content.text = value;
    476      });
    477    }
    478 
    479    return content;
    480  }
    481 
    482  async buildCache(networkEvent) {
    483    const cache = {};
    484 
    485    // if resource has changed, return early
    486    if (networkEvent.status != "304") {
    487      return cache;
    488    }
    489 
    490    if (networkEvent.responseCacheAvailable && this._connector.requestData) {
    491      const responseCache = await this._connector.requestData(
    492        networkEvent.id,
    493        "responseCache"
    494      );
    495      if (responseCache.cache) {
    496        cache.afterRequest = this.buildCacheEntry(responseCache.cache);
    497      }
    498    } else if (networkEvent.responseCache?.cache) {
    499      cache.afterRequest = this.buildCacheEntry(
    500        networkEvent.responseCache.cache
    501      );
    502    } else {
    503      cache.afterRequest = null;
    504    }
    505 
    506    return cache;
    507  }
    508 
    509  buildCacheEntry(cacheEntry) {
    510    const cache = {};
    511 
    512    if (typeof cacheEntry !== "undefined") {
    513      cache.expires = findKeys(cacheEntry, ["expirationTime", "expires"]);
    514      cache.lastFetched = findKeys(cacheEntry, ["lastFetched"]);
    515 
    516      // TODO: eTag support
    517      // Har format expects cache entries to provide information about eTag,
    518      // however this is not currently exposed on nsICacheEntry.
    519      // This should be stored under cache.eTag. See Bug 1799844.
    520      cache.fetchCount = findKeys(cacheEntry, ["fetchCount"]);
    521 
    522      // har-importer.js, along with other files, use buildCacheEntry
    523      // initial value comes from properties without underscores.
    524      // this checks for both in appropriate order.
    525      cache._dataSize = findKeys(cacheEntry, ["storageDataSize", "_dataSize"]);
    526      cache._lastModified = findKeys(cacheEntry, [
    527        "lastModified",
    528        "_lastModified",
    529      ]);
    530      cache._device = findKeys(cacheEntry, ["deviceID", "_device"]);
    531    }
    532 
    533    return cache;
    534  }
    535 
    536  // RDP Helpers
    537  fetchData(string) {
    538    const promise = this._connector.getLongString(string).then(value => {
    539      return value;
    540    });
    541 
    542    // Building HAR is asynchronous and not done till all
    543    // collected promises are resolved.
    544    this.promises.push(promise);
    545 
    546    return promise;
    547  }
    548 }
    549 
    550 // Helpers
    551 
    552 /**
    553 * Find specified keys within an object.
    554 * Searches object for keys passed in, returns first value returned,
    555 * or an empty string.
    556 *
    557 * @param obj (object)
    558 * @param keys (array)
    559 * @returns {string}
    560 */
    561 function findKeys(obj, keys) {
    562  if (!keys) {
    563    return "";
    564  }
    565 
    566  const keyFound = keys.filter(key => obj[key]);
    567  if (!keys.length) {
    568    return "";
    569  }
    570 
    571  const value = obj[keyFound[0]];
    572  if (typeof value === "undefined" || typeof value === "object") {
    573    return "";
    574  }
    575 
    576  return String(value);
    577 }
    578 
    579 /**
    580 * Find specified value within an array of name-value pairs
    581 * (used for headers, cookies and cache entries)
    582 */
    583 function findValue(arr, name) {
    584  if (!arr) {
    585    return "";
    586  }
    587 
    588  name = name.toLowerCase();
    589  const result = arr.find(entry => entry.name.toLowerCase() == name);
    590  return result ? result.value : "";
    591 }
    592 
    593 /**
    594 * Generate HAR representation of a date.
    595 * (YYYY-MM-DDThh:mm:ss.sTZD, e.g. 2009-07-24T19:20:30.45+01:00)
    596 * See also HAR Schema: http://janodvarko.cz/har/viewer/
    597 *
    598 * Note: it would be great if we could utilize Date.toJSON(), but
    599 * it doesn't return proper time zone offset.
    600 *
    601 * An example:
    602 * This helper returns:    2015-05-29T16:10:30.424+02:00
    603 * Date.toJSON() returns:  2015-05-29T14:10:30.424Z
    604 *
    605 * @param date {Date} The date object we want to convert.
    606 */
    607 function dateToHarString(date) {
    608  function f(n, c) {
    609    if (!c) {
    610      c = 2;
    611    }
    612    let s = String(n);
    613    while (s.length < c) {
    614      s = "0" + s;
    615    }
    616    return s;
    617  }
    618 
    619  const result =
    620    date.getFullYear() +
    621    "-" +
    622    f(date.getMonth() + 1) +
    623    "-" +
    624    f(date.getDate()) +
    625    "T" +
    626    f(date.getHours()) +
    627    ":" +
    628    f(date.getMinutes()) +
    629    ":" +
    630    f(date.getSeconds()) +
    631    "." +
    632    f(date.getMilliseconds(), 3);
    633 
    634  let offset = date.getTimezoneOffset();
    635  const positive = offset > 0;
    636 
    637  // Convert to positive number before using Math.floor (see issue 5512)
    638  offset = Math.abs(offset);
    639  const offsetHours = Math.floor(offset / 60);
    640  const offsetMinutes = Math.floor(offset % 60);
    641  const prettyOffset =
    642    (positive > 0 ? "-" : "+") + f(offsetHours) + ":" + f(offsetMinutes);
    643 
    644  return result + prettyOffset;
    645 }
    646 
    647 // Exports from this module
    648 exports.HarBuilder = HarBuilder;