tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

nsURLHelper.h (12961B)


      1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* This Source Code Form is subject to the terms of the Mozilla Public
      3 * License, v. 2.0. If a copy of the MPL was not distributed with this
      4 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      5 
      6 #ifndef nsURLHelper_h__
      7 #define nsURLHelper_h__
      8 
      9 #include "nsString.h"
     10 #include "nsTArray.h"
     11 #include "nsASCIIMask.h"
     12 #include <mozilla/Maybe.h>
     13 #include <mozilla/CompactPair.h>
     14 
     15 class nsIFile;
     16 class nsIURLParser;
     17 
     18 //----------------------------------------------------------------------------
     19 // This module contains some private helper functions related to URL parsing.
     20 //----------------------------------------------------------------------------
     21 
     22 /* shutdown frees URL parser */
     23 void net_ShutdownURLHelper();
     24 
     25 /* access URL parsers */
     26 already_AddRefed<nsIURLParser> net_GetAuthURLParser();
     27 already_AddRefed<nsIURLParser> net_GetNoAuthURLParser();
     28 already_AddRefed<nsIURLParser> net_GetStdURLParser();
     29 
     30 /* convert between nsIFile and file:// URL spec
     31 * net_GetURLSpecFromFile does an extra stat, so callers should
     32 * avoid it if possible in favor of net_GetURLSpecFromActualFile
     33 * and net_GetURLSpecFromDir */
     34 nsresult net_GetURLSpecFromFile(nsIFile*, nsACString&);
     35 nsresult net_GetURLSpecFromDir(nsIFile*, nsACString&);
     36 nsresult net_GetURLSpecFromActualFile(nsIFile*, nsACString&);
     37 nsresult net_GetFileFromURLSpec(const nsACString&, nsIFile**);
     38 
     39 /* extract file path components from file:// URL */
     40 nsresult net_ParseFileURL(const nsACString& inURL, nsACString& outDirectory,
     41                          nsACString& outFileBaseName,
     42                          nsACString& outFileExtension);
     43 
     44 // handle .. in dirs while resolving URLs (path is UTF-8)
     45 // Return a tuple containing:
     46 // (index of the last slash, index of the end of the basename)
     47 mozilla::Maybe<mozilla::CompactPair<uint32_t, uint32_t>> net_CoalesceDirs(
     48    char* path);
     49 
     50 /**
     51 * Check if a URL is absolute
     52 *
     53 * @param inURL     URL spec
     54 * @return true if the given spec represents an absolute URL
     55 */
     56 bool net_IsAbsoluteURL(const nsACString& uri);
     57 
     58 /**
     59 * Extract URI-Scheme if possible
     60 *
     61 * @param inURI     URI spec
     62 * @param scheme    scheme copied to this buffer on return. Is lowercase.
     63 */
     64 nsresult net_ExtractURLScheme(const nsACString& inURI, nsACString& scheme);
     65 
     66 /* check that the given scheme conforms to RFC 2396 */
     67 bool net_IsValidScheme(const nsACString& scheme);
     68 
     69 /**
     70 * This function strips out all C0 controls and space at the beginning and end
     71 * of the URL and filters out \r, \n, \t from the middle of the URL.  This makes
     72 * it safe to call on things like javascript: urls or data: urls, where we may
     73 * in fact run into whitespace that is not properly encoded.
     74 *
     75 * @param input the URL spec we want to filter
     76 * @param result the out param to write to if filtering happens
     77 */
     78 void net_FilterURIString(const nsACString& input, nsACString& result);
     79 
     80 /**
     81 * This function performs character stripping just like net_FilterURIString,
     82 * with the added benefit of also performing percent escaping of dissallowed
     83 * characters, all in one pass. Saving one pass is very important when operating
     84 * on really large strings.
     85 *
     86 * @param aInput the URL spec we want to filter
     87 * @param aFlags the flags which control which characters we escape
     88 * @param aFilterMask a mask of characters that should excluded from the result
     89 * @param aResult the out param to write to if filtering happens
     90 */
     91 nsresult net_FilterAndEscapeURI(const nsACString& aInput, uint32_t aFlags,
     92                                const ASCIIMaskArray& aFilterMask,
     93                                nsACString& aResult);
     94 
     95 #if defined(XP_WIN)
     96 /**
     97 * On Win32 and OS/2 system's a back-slash in a file:// URL is equivalent to a
     98 * forward-slash.  This function maps any back-slashes to forward-slashes.
     99 *
    100 * @param aURL
    101 *        The URL string to normalize (UTF-8 encoded).  This can be a
    102 *        relative URL segment.
    103 * @param aResultBuf
    104 *        The resulting string is appended to this string.  If the input URL
    105 *        is already normalized, then aResultBuf is unchanged.
    106 *
    107 * @returns false if aURL is already normalized.  Otherwise, returns true.
    108 */
    109 bool net_NormalizeFileURL(const nsACString& aURL, nsCString& aResultBuf);
    110 #endif
    111 
    112 /*****************************************************************************
    113 * generic string routines follow (XXX move to someplace more generic).
    114 */
    115 
    116 /* convert to lower case */
    117 void net_ToLowerCase(char* str, uint32_t length);
    118 void net_ToLowerCase(char* str);
    119 
    120 /**
    121 * returns pointer to first character of |str| in the given set.  if not found,
    122 * then |end| is returned.  stops prematurely if a null byte is encountered,
    123 * and returns the address of the null byte.
    124 */
    125 char* net_FindCharInSet(const char* iter, const char* stop, const char* set);
    126 
    127 /**
    128 * returns pointer to first character of |str| NOT in the given set.  if all
    129 * characters are in the given set, then |end| is returned.  if '\0' is not
    130 * included in |set|, then stops prematurely if a null byte is encountered,
    131 * and returns the address of the null byte.
    132 */
    133 char* net_FindCharNotInSet(const char* iter, const char* stop, const char* set);
    134 
    135 /**
    136 * returns pointer to last character of |str| NOT in the given set.  if all
    137 * characters are in the given set, then |str - 1| is returned.
    138 */
    139 char* net_RFindCharNotInSet(const char* stop, const char* iter,
    140                            const char* set);
    141 
    142 /**
    143 * Parses a content-type header and returns the content type and
    144 * charset (if any).  aCharset is not modified if no charset is
    145 * specified in anywhere in aHeaderStr.  In that case (no charset
    146 * specified), aHadCharset is set to false.  Otherwise, it's set to
    147 * true.  Note that aContentCharset can be empty even if aHadCharset
    148 * is true.
    149 *
    150 * This parsing is suitable for HTTP request.  Use net_ParseContentType
    151 * for parsing this header in HTTP responses.
    152 */
    153 void net_ParseRequestContentType(const nsACString& aHeaderStr,
    154                                 nsACString& aContentType,
    155                                 nsACString& aContentCharset,
    156                                 bool* aHadCharset);
    157 
    158 /**
    159 * Parses a content-type header and returns the content type and
    160 * charset (if any).  aCharset is not modified if no charset is
    161 * specified in anywhere in aHeaderStr.  In that case (no charset
    162 * specified), aHadCharset is set to false.  Otherwise, it's set to
    163 * true.  Note that aContentCharset can be empty even if aHadCharset
    164 * is true.
    165 */
    166 void net_ParseContentType(const nsACString& aHeaderStr,
    167                          nsACString& aContentType, nsACString& aContentCharset,
    168                          bool* aHadCharset);
    169 /**
    170 * As above, but also returns the start and end indexes for the charset
    171 * parameter in aHeaderStr.  These are indices for the entire parameter, NOT
    172 * just the value.  If there is "effectively" no charset parameter (e.g. if an
    173 * earlier type with one is overridden by a later type without one),
    174 * *aHadCharset will be true but *aCharsetStart will be set to -1.  Note that
    175 * it's possible to have aContentCharset empty and *aHadCharset true when
    176 * *aCharsetStart is nonnegative; this corresponds to charset="".
    177 */
    178 void net_ParseContentType(const nsACString& aHeaderStr,
    179                          nsACString& aContentType, nsACString& aContentCharset,
    180                          bool* aHadCharset, int32_t* aCharsetStart,
    181                          int32_t* aCharsetEnd);
    182 
    183 /* inline versions */
    184 
    185 /* remember the 64-bit platforms ;-) */
    186 #define NET_MAX_ADDRESS ((char*)UINTPTR_MAX)
    187 
    188 inline char* net_FindCharInSet(const char* str, const char* set) {
    189  return net_FindCharInSet(str, NET_MAX_ADDRESS, set);
    190 }
    191 inline char* net_FindCharNotInSet(const char* str, const char* set) {
    192  return net_FindCharNotInSet(str, NET_MAX_ADDRESS, set);
    193 }
    194 inline char* net_RFindCharNotInSet(const char* str, const char* set) {
    195  return net_RFindCharNotInSet(str, str + strlen(str), set);
    196 }
    197 
    198 /**
    199 * This function returns true if the given hostname does not include any
    200 * restricted characters.  Otherwise, false is returned.
    201 */
    202 bool net_IsValidDNSHost(const nsACString& host);
    203 
    204 /**
    205 * Checks whether the IPv4 address is valid according to RFC 3986 section 3.2.2.
    206 */
    207 bool net_IsValidIPv4Addr(const nsACString& aAddr);
    208 
    209 /**
    210 * Checks whether the IPv6 address is valid according to RFC 3986 section 3.2.2.
    211 */
    212 bool net_IsValidIPv6Addr(const nsACString& aAddr);
    213 
    214 /**
    215 * Returns the default status text for a given HTTP status code (useful if HTTP2
    216 * does not provide one, for instance).
    217 */
    218 bool net_GetDefaultStatusTextForCode(uint16_t aCode, nsACString& aOutText);
    219 
    220 namespace mozilla {
    221 /**
    222 * A class for handling form-urlencoded query strings.
    223 *
    224 * Manages an ordered list of name-value pairs, and allows conversion from and
    225 * to the string representation.
    226 *
    227 * In addition, there are static functions for handling one-shot use cases.
    228 */
    229 class URLParams final {
    230 public:
    231  /**
    232   * \brief Parses a query string and calls a parameter handler for each
    233   * name/value pair. The parameter handler can stop processing early by
    234   * returning false.
    235   *
    236   * \param aInput the query string to parse
    237   * \param aParamHandler the parameter handler as desribed above
    238   * \tparam ParamHandler a function type compatible with signature
    239   * bool(nsCString, nsCString)
    240   *
    241   * \return false if the parameter handler returned false for any parameter,
    242   * true otherwise
    243   */
    244  template <typename ParamHandler>
    245  static bool Parse(const nsACString& aInput, bool aShouldDecode,
    246                    ParamHandler aParamHandler) {
    247    const char* start = aInput.BeginReading();
    248    const char* const end = aInput.EndReading();
    249 
    250    while (start != end) {
    251      nsAutoCString name;
    252      nsAutoCString value;
    253 
    254      if (!ParseNextInternal(start, end, aShouldDecode, &name, &value)) {
    255        continue;
    256      }
    257 
    258      if (!aParamHandler(std::move(name), std::move(value))) {
    259        return false;
    260      }
    261    }
    262    return true;
    263  }
    264 
    265  /**
    266   * \brief Parses a query string and returns the value of a single parameter
    267   * specified by name.
    268   *
    269   * If there are multiple parameters with the same name, the value of the first
    270   * is returned.
    271   *
    272   * \param aInput the query string to parse
    273   * \param aName the name of the parameter to extract
    274   * \param[out] aValue will be assigned the parameter value, set to void if
    275   * there is no match \return true iff there was a parameter with with name
    276   * \paramref aName
    277   */
    278  static bool Extract(const nsACString& aInput, const nsACString& aName,
    279                      nsACString& aValue);
    280 
    281  /**
    282   * \brief Resets the state of this instance and parses a new query string.
    283   *
    284   * \param aInput the query string to parse
    285   */
    286  void ParseInput(const nsACString& aInput);
    287 
    288  /**
    289   * Serializes the current state to a query string.
    290   *
    291   * \param[out] aValue will be assigned the result of the serialization
    292   * \param aEncode If this is true, the serialization will encode the string.
    293   */
    294  void Serialize(nsACString& aValue, bool aEncode) const;
    295 
    296  static void SerializeString(const nsACString& aInput, nsACString& aValue);
    297  void Get(const nsACString& aName, nsACString& aRetval);
    298 
    299  void GetAll(const nsACString& aName, nsTArray<nsCString>& aRetval);
    300 
    301  /**
    302   * \brief Sets the value of a given parameter.
    303   *
    304   * If one or more parameters of the name exist, the value of the first is
    305   * replaced, and all further parameters of the name are deleted. Otherwise,
    306   * the behaviour is the same as \ref Append.
    307   */
    308  void Set(const nsACString& aName, const nsACString& aValue);
    309 
    310  void Append(const nsACString& aName, const nsACString& aValue);
    311 
    312  bool Has(const nsACString& aName);
    313 
    314  bool Has(const nsACString& aName, const nsACString& aValue);
    315 
    316  /**
    317   * \brief Deletes all parameters with the given name.
    318   */
    319  void Delete(const nsACString& aName);
    320 
    321  void Delete(const nsACString& aName, const nsACString& aValue);
    322 
    323  void DeleteAll() { mParams.Clear(); }
    324 
    325  uint32_t Length() const { return mParams.Length(); }
    326 
    327  static void DecodeString(const nsACString& aInput, nsACString& aOutput);
    328  const nsACString& GetKeyAtIndex(uint32_t aIndex) const {
    329    MOZ_ASSERT(aIndex < mParams.Length());
    330    return mParams[aIndex].mKey;
    331  }
    332 
    333  const nsACString& GetValueAtIndex(uint32_t aIndex) const {
    334    MOZ_ASSERT(aIndex < mParams.Length());
    335    return mParams[aIndex].mValue;
    336  }
    337 
    338  /**
    339   * \brief Performs a stable sort of the parameters, maintaining the order of
    340   * multiple parameters with the same name.
    341   */
    342  void Sort();
    343 
    344 private:
    345  static bool ParseNextInternal(const char*& aStart, const char* aEnd,
    346                                bool aShouldDecode, nsACString* aOutputName,
    347                                nsACString* aOutputValue);
    348 
    349  struct Param {
    350    nsCString mKey;
    351    nsCString mValue;
    352  };
    353 
    354  nsTArray<Param> mParams;
    355 };
    356 }  // namespace mozilla
    357 
    358 #endif  // !nsURLHelper_h__