tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

unicode-url.js (4274B)


      1 /* This Source Code Form is subject to the terms of the Mozilla Public
      2 * License, v. 2.0. If a copy of the MPL was not distributed with this
      3 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      4 "use strict";
      5 
      6 const idnService = Cc["@mozilla.org/network/idn-service;1"].getService(
      7  Ci.nsIIDNService
      8 );
      9 
     10 /**
     11 * Gets a readble Unicode hostname from a hostname.
     12 *
     13 * If the `hostname` is a readable ASCII hostname, such as example.org, then
     14 * this function will simply return the original `hostname`.
     15 *
     16 * If the `hostname` is a Punycode hostname representing a Unicode domain name,
     17 * such as xn--g6w.xn--8pv, then this function will return the readable Unicode
     18 * domain name by decoding the Punycode hostname.
     19 *
     20 * @param {string}  hostname
     21 *                  the hostname from which the Unicode hostname will be
     22 *                  parsed, such as example.org, xn--g6w.xn--8pv.
     23 * @return {string} The Unicode hostname. It may be the same as the `hostname`
     24 *                  passed to this function if the `hostname` itself is
     25 *                  a readable ASCII hostname or a Unicode hostname.
     26 */
     27 function getUnicodeHostname(hostname) {
     28  return idnService.convertToDisplayIDN(hostname);
     29 }
     30 
     31 /**
     32 * Gets a readble Unicode URL pathname from a URL pathname.
     33 *
     34 * If the `urlPath` is a readable ASCII URL pathname, such as /a/b/c.js, then
     35 * this function will simply return the original `urlPath`.
     36 *
     37 * If the `urlPath` is a URI-encoded pathname, such as %E8%A9%A6/%E6%B8%AC.js,
     38 * then this function will return the readable Unicode pathname.
     39 *
     40 * If the `urlPath` is a malformed URL pathname, then this function will simply
     41 * return the original `urlPath`.
     42 *
     43 * @param {string}  urlPath
     44 *                  the URL path from which the Unicode URL path will be parsed,
     45 *                  such as /a/b/c.js, %E8%A9%A6/%E6%B8%AC.js.
     46 * @return {string} The Unicode URL Path. It may be the same as the `urlPath`
     47 *                  passed to this function if the `urlPath` itself is a readable
     48 *                  ASCII url or a Unicode url.
     49 */
     50 function getUnicodeUrlPath(urlPath) {
     51  try {
     52    return decodeURIComponent(urlPath);
     53  } catch (err) {}
     54  return urlPath;
     55 }
     56 
     57 /**
     58 * Gets a readable Unicode URL from a URL.
     59 *
     60 * If the `url` is a readable ASCII URL, such as http://example.org/a/b/c.js,
     61 * then this function will simply return the original `url`.
     62 *
     63 * If the `url` includes either an unreadable Punycode domain name or an
     64 * unreadable URI-encoded pathname, such as
     65 * http://xn--g6w.xn--8pv/%E8%A9%A6/%E6%B8%AC.js, then this function will return
     66 * the readable URL by decoding all its unreadable URL components to Unicode
     67 * characters. The character `#` is not decoded from escape sequences.
     68 *
     69 * If the `url` is a malformed URL, then this function will return the original
     70 * `url`.
     71 *
     72 * If the `url` is a data: URI, then this function will return the original
     73 * `url`.
     74 *
     75 * @param {string|URL}  url
     76 *                  the full URL, or a data: URI. from which the readable URL
     77 *                  will be parsed, such as, http://example.org/a/b/c.js,
     78 *                  http://xn--g6w.xn--8pv/%E8%A9%A6/%E6%B8%AC.js
     79 *                  Can also be an already parsed URL.
     80 * @return {string} The readable URL. It may be the same as the `url` passed to
     81 *                  this function if the `url` itself is readable.
     82 */
     83 function getUnicodeUrl(url) {
     84  try {
     85    let urlObject;
     86    if (typeof url === "string") {
     87      urlObject = new URL(url);
     88    } else {
     89      urlObject = url;
     90      url = urlObject.href;
     91    }
     92 
     93    const { protocol, hostname } = urlObject;
     94    if (protocol === "data:") {
     95      // Never convert a data: URI.
     96      return url;
     97    }
     98    const readableHostname = getUnicodeHostname(hostname);
     99 
    100    /* We use `decodeURIComponent` instead of decodeURI as the
    101     * later does not decode some characters, it only can decode characters
    102     * previously encoded by the encodeURI. See
    103     * https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURI#Description
    104     */
    105    url = decodeURIComponent(url);
    106    return url.replace(hostname, readableHostname);
    107  } catch (err) {}
    108  return url;
    109 }
    110 
    111 module.exports = {
    112  getUnicodeHostname,
    113  getUnicodeUrlPath,
    114  getUnicodeUrl,
    115 };