tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

mozStorageSQLFunctions.cpp (12762B)


      1 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*-
      2 * vim: sw=2 ts=2 et lcs=trail\:.,tab\:>~ :
      3 * This Source Code Form is subject to the terms of the Mozilla Public
      4 * License, v. 2.0. If a copy of the MPL was not distributed with this
      5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
      6 
      7 #include "mozStorageSQLFunctions.h"
      8 #include "nsTArray.h"
      9 #include "nsUnicharUtils.h"
     10 #include <algorithm>
     11 #include "sqlite3.h"
     12 
     13 namespace mozilla {
     14 namespace storage {
     15 
     16 ////////////////////////////////////////////////////////////////////////////////
     17 //// Local Helper Functions
     18 
     19 namespace {
     20 
     21 /**
     22 * Performs the LIKE comparison of a string against a pattern.  For more detail
     23 * see http://www.sqlite.org/lang_expr.html#like.
     24 *
     25 * @param aPatternItr
     26 *        An iterator at the start of the pattern to check for.
     27 * @param aPatternEnd
     28 *        An iterator at the end of the pattern to check for.
     29 * @param aStringItr
     30 *        An iterator at the start of the string to check for the pattern.
     31 * @param aStringEnd
     32 *        An iterator at the end of the string to check for the pattern.
     33 * @param aEscapeChar
     34 *        The character to use for escaping symbols in the pattern.
     35 * @return 1 if the pattern is found, 0 otherwise.
     36 */
     37 int likeCompare(nsAString::const_iterator aPatternItr,
     38                nsAString::const_iterator aPatternEnd,
     39                nsAString::const_iterator aStringItr,
     40                nsAString::const_iterator aStringEnd, char16_t aEscapeChar) {
     41  const char16_t MATCH_ALL('%');
     42  const char16_t MATCH_ONE('_');
     43 
     44  bool lastWasEscape = false;
     45  while (aPatternItr != aPatternEnd) {
     46    /**
     47     * What we do in here is take a look at each character from the input
     48     * pattern, and do something with it.  There are 4 possibilities:
     49     * 1) character is an un-escaped match-all character
     50     * 2) character is an un-escaped match-one character
     51     * 3) character is an un-escaped escape character
     52     * 4) character is not any of the above
     53     */
     54    if (!lastWasEscape && *aPatternItr == MATCH_ALL) {
     55      // CASE 1
     56      /**
     57       * Now we need to skip any MATCH_ALL or MATCH_ONE characters that follow a
     58       * MATCH_ALL character.  For each MATCH_ONE character, skip one character
     59       * in the pattern string.
     60       */
     61      while (*aPatternItr == MATCH_ALL || *aPatternItr == MATCH_ONE) {
     62        if (*aPatternItr == MATCH_ONE) {
     63          // If we've hit the end of the string we are testing, no match
     64          if (aStringItr == aStringEnd) return 0;
     65          aStringItr++;
     66        }
     67        aPatternItr++;
     68      }
     69 
     70      // If we've hit the end of the pattern string, match
     71      if (aPatternItr == aPatternEnd) return 1;
     72 
     73      while (aStringItr != aStringEnd) {
     74        if (likeCompare(aPatternItr, aPatternEnd, aStringItr, aStringEnd,
     75                        aEscapeChar)) {
     76          // we've hit a match, so indicate this
     77          return 1;
     78        }
     79        aStringItr++;
     80      }
     81 
     82      // No match
     83      return 0;
     84    }
     85    if (!lastWasEscape && *aPatternItr == MATCH_ONE) {
     86      // CASE 2
     87      if (aStringItr == aStringEnd) {
     88        // If we've hit the end of the string we are testing, no match
     89        return 0;
     90      }
     91      aStringItr++;
     92      lastWasEscape = false;
     93    } else if (!lastWasEscape && *aPatternItr == aEscapeChar) {
     94      // CASE 3
     95      lastWasEscape = true;
     96    } else {
     97      // CASE 4
     98      if (::ToUpperCase(*aStringItr) != ::ToUpperCase(*aPatternItr)) {
     99        // If we've hit a point where the strings don't match, there is no match
    100        return 0;
    101      }
    102      aStringItr++;
    103      lastWasEscape = false;
    104    }
    105 
    106    aPatternItr++;
    107  }
    108 
    109  return aStringItr == aStringEnd;
    110 }
    111 
    112 /**
    113 * Compute the Levenshtein Edit Distance between two strings.
    114 *
    115 * @param aStringS
    116 *        a string
    117 * @param aStringT
    118 *        another string
    119 * @param _result
    120 *        an outparam that will receive the edit distance between the arguments
    121 * @return a Sqlite result code, e.g. SQLITE_OK, SQLITE_NOMEM, etc.
    122 */
    123 int levenshteinDistance(const nsAString& aStringS, const nsAString& aStringT,
    124                        int* _result) {
    125  // Set the result to a non-sensical value in case we encounter an error.
    126  *_result = -1;
    127 
    128  const uint32_t sLen = aStringS.Length();
    129  const uint32_t tLen = aStringT.Length();
    130 
    131  if (sLen == 0) {
    132    *_result = tLen;
    133    return SQLITE_OK;
    134  }
    135  if (tLen == 0) {
    136    *_result = sLen;
    137    return SQLITE_OK;
    138  }
    139 
    140  // Notionally, Levenshtein Distance is computed in a matrix.  If we
    141  // assume s = "span" and t = "spam", the matrix would look like this:
    142  //    s -->
    143  //  t          s   p   a   n
    144  //  |      0   1   2   3   4
    145  //  V  s   1   *   *   *   *
    146  //     p   2   *   *   *   *
    147  //     a   3   *   *   *   *
    148  //     m   4   *   *   *   *
    149  //
    150  // Note that the row width is sLen + 1 and the column height is tLen + 1,
    151  // where sLen is the length of the string "s" and tLen is the length of "t".
    152  // The first row and the first column are initialized as shown, and
    153  // the algorithm computes the remaining cells row-by-row, and
    154  // left-to-right within each row.  The computation only requires that
    155  // we be able to see the current row and the previous one.
    156 
    157  // Allocate memory for two rows.
    158  AutoTArray<int, nsAutoString::kStorageSize> row1;
    159  AutoTArray<int, nsAutoString::kStorageSize> row2;
    160 
    161  // Declare the raw pointers that will actually be used to access the memory.
    162  int* prevRow = row1.AppendElements(sLen + 1);
    163  int* currRow = row2.AppendElements(sLen + 1);
    164 
    165  // Initialize the first row.
    166  for (uint32_t i = 0; i <= sLen; i++) prevRow[i] = i;
    167 
    168  const char16_t* s = aStringS.BeginReading();
    169  const char16_t* t = aStringT.BeginReading();
    170 
    171  // Compute the empty cells in the "matrix" row-by-row, starting with
    172  // the second row.
    173  for (uint32_t ti = 1; ti <= tLen; ti++) {
    174    // Initialize the first cell in this row.
    175    currRow[0] = ti;
    176 
    177    // Get the character from "t" that corresponds to this row.
    178    const char16_t tch = t[ti - 1];
    179 
    180    // Compute the remaining cells in this row, left-to-right,
    181    // starting at the second column (and first character of "s").
    182    for (uint32_t si = 1; si <= sLen; si++) {
    183      // Get the character from "s" that corresponds to this column,
    184      // compare it to the t-character, and compute the "cost".
    185      const char16_t sch = s[si - 1];
    186      int cost = (sch == tch) ? 0 : 1;
    187 
    188      // ............ We want to calculate the value of cell "d" from
    189      // ...ab....... the previously calculated (or initialized) cells
    190      // ...cd....... "a", "b", and "c", where d = min(a', b', c').
    191      // ............
    192      int aPrime = prevRow[si - 1] + cost;
    193      int bPrime = prevRow[si] + 1;
    194      int cPrime = currRow[si - 1] + 1;
    195      currRow[si] = std::min(aPrime, std::min(bPrime, cPrime));
    196    }
    197 
    198    // Advance to the next row.  The current row becomes the previous
    199    // row and we recycle the old previous row as the new current row.
    200    // We don't need to re-initialize the new current row since we will
    201    // rewrite all of its cells anyway.
    202    int* oldPrevRow = prevRow;
    203    prevRow = currRow;
    204    currRow = oldPrevRow;
    205  }
    206 
    207  // The final result is the value of the last cell in the last row.
    208  // Note that that's now in the "previous" row, since we just swapped them.
    209  *_result = prevRow[sLen];
    210  return SQLITE_OK;
    211 }
    212 
    213 // This struct is used only by registerFunctions below, but ISO C++98 forbids
    214 // instantiating a template dependent on a locally-defined type.  Boo-urns!
    215 struct Functions {
    216  const char* zName;
    217  int nArg;
    218  int enc;
    219  void* pContext;
    220  void (*xFunc)(::sqlite3_context*, int, sqlite3_value**);
    221 };
    222 
    223 }  // namespace
    224 
    225 ////////////////////////////////////////////////////////////////////////////////
    226 //// Exposed Functions
    227 
    228 int registerFunctions(sqlite3* aDB) {
    229  Functions functions[] = {
    230      {"lower", 1, SQLITE_UTF16, 0, caseFunction},
    231      {"lower", 1, SQLITE_UTF8, 0, caseFunction},
    232      {"upper", 1, SQLITE_UTF16, (void*)1, caseFunction},
    233      {"upper", 1, SQLITE_UTF8, (void*)1, caseFunction},
    234 
    235      {"like", 2, SQLITE_UTF16, 0, likeFunction},
    236      {"like", 2, SQLITE_UTF8, 0, likeFunction},
    237      {"like", 3, SQLITE_UTF16, 0, likeFunction},
    238      {"like", 3, SQLITE_UTF8, 0, likeFunction},
    239 
    240      {"levenshteinDistance", 2, SQLITE_UTF16, 0, levenshteinDistanceFunction},
    241      {"levenshteinDistance", 2, SQLITE_UTF8, 0, levenshteinDistanceFunction},
    242 
    243      {"utf16Length", 1, SQLITE_UTF16, 0, utf16LengthFunction},
    244      {"utf16Length", 1, SQLITE_UTF8, 0, utf16LengthFunction},
    245  };
    246 
    247  int rv = SQLITE_OK;
    248  for (size_t i = 0; SQLITE_OK == rv && i < std::size(functions); ++i) {
    249    struct Functions* p = &functions[i];
    250    rv = ::sqlite3_create_function(aDB, p->zName, p->nArg, p->enc, p->pContext,
    251                                   p->xFunc, nullptr, nullptr);
    252  }
    253 
    254  return rv;
    255 }
    256 
    257 ////////////////////////////////////////////////////////////////////////////////
    258 //// SQL Functions
    259 
    260 void caseFunction(sqlite3_context* aCtx, int aArgc, sqlite3_value** aArgv) {
    261  NS_ASSERTION(1 == aArgc, "Invalid number of arguments!");
    262 
    263  const char16_t* value =
    264      static_cast<const char16_t*>(::sqlite3_value_text16(aArgv[0]));
    265  nsAutoString data(value,
    266                    ::sqlite3_value_bytes16(aArgv[0]) / sizeof(char16_t));
    267  bool toUpper = ::sqlite3_user_data(aCtx) ? true : false;
    268 
    269  if (toUpper)
    270    ::ToUpperCase(data);
    271  else
    272    ::ToLowerCase(data);
    273 
    274  // Set the result.
    275  ::sqlite3_result_text16(aCtx, data.get(), data.Length() * sizeof(char16_t),
    276                          SQLITE_TRANSIENT);
    277 }
    278 
    279 /**
    280 * This implements the like() SQL function.  This is used by the LIKE operator.
    281 * The SQL statement 'A LIKE B' is implemented as 'like(B, A)', and if there is
    282 * an escape character, say E, it is implemented as 'like(B, A, E)'.
    283 */
    284 void likeFunction(sqlite3_context* aCtx, int aArgc, sqlite3_value** aArgv) {
    285  NS_ASSERTION(2 == aArgc || 3 == aArgc, "Invalid number of arguments!");
    286 
    287  if (::sqlite3_value_bytes(aArgv[0]) >
    288      ::sqlite3_limit(::sqlite3_context_db_handle(aCtx),
    289                      SQLITE_LIMIT_LIKE_PATTERN_LENGTH, -1)) {
    290    ::sqlite3_result_error(aCtx, "LIKE or GLOB pattern too complex",
    291                           SQLITE_TOOBIG);
    292    return;
    293  }
    294 
    295  if (!::sqlite3_value_text16(aArgv[0]) || !::sqlite3_value_text16(aArgv[1]))
    296    return;
    297 
    298  const char16_t* a =
    299      static_cast<const char16_t*>(::sqlite3_value_text16(aArgv[1]));
    300  int aLen = ::sqlite3_value_bytes16(aArgv[1]) / sizeof(char16_t);
    301  nsDependentString A(a, aLen);
    302 
    303  const char16_t* b =
    304      static_cast<const char16_t*>(::sqlite3_value_text16(aArgv[0]));
    305  int bLen = ::sqlite3_value_bytes16(aArgv[0]) / sizeof(char16_t);
    306  nsDependentString B(b, bLen);
    307  NS_ASSERTION(!B.IsEmpty(), "LIKE string must not be null!");
    308 
    309  char16_t E = 0;
    310  if (3 == aArgc)
    311    E = static_cast<const char16_t*>(::sqlite3_value_text16(aArgv[2]))[0];
    312 
    313  nsAString::const_iterator itrString, endString;
    314  A.BeginReading(itrString);
    315  A.EndReading(endString);
    316  nsAString::const_iterator itrPattern, endPattern;
    317  B.BeginReading(itrPattern);
    318  B.EndReading(endPattern);
    319  ::sqlite3_result_int(
    320      aCtx, likeCompare(itrPattern, endPattern, itrString, endString, E));
    321 }
    322 
    323 void levenshteinDistanceFunction(sqlite3_context* aCtx, int aArgc,
    324                                 sqlite3_value** aArgv) {
    325  NS_ASSERTION(2 == aArgc, "Invalid number of arguments!");
    326 
    327  // If either argument is a SQL NULL, then return SQL NULL.
    328  if (::sqlite3_value_type(aArgv[0]) == SQLITE_NULL ||
    329      ::sqlite3_value_type(aArgv[1]) == SQLITE_NULL) {
    330    ::sqlite3_result_null(aCtx);
    331    return;
    332  }
    333 
    334  const char16_t* a =
    335      static_cast<const char16_t*>(::sqlite3_value_text16(aArgv[0]));
    336  int aLen = ::sqlite3_value_bytes16(aArgv[0]) / sizeof(char16_t);
    337 
    338  const char16_t* b =
    339      static_cast<const char16_t*>(::sqlite3_value_text16(aArgv[1]));
    340  int bLen = ::sqlite3_value_bytes16(aArgv[1]) / sizeof(char16_t);
    341 
    342  // Compute the Levenshtein Distance, and return the result (or error).
    343  int distance = -1;
    344  const nsDependentString A(a, aLen);
    345  const nsDependentString B(b, bLen);
    346  int status = levenshteinDistance(A, B, &distance);
    347  if (status == SQLITE_OK) {
    348    ::sqlite3_result_int(aCtx, distance);
    349  } else if (status == SQLITE_NOMEM) {
    350    ::sqlite3_result_error_nomem(aCtx);
    351  } else {
    352    ::sqlite3_result_error(aCtx, "User function returned error code", -1);
    353  }
    354 }
    355 
    356 void utf16LengthFunction(sqlite3_context* aCtx, int aArgc,
    357                         sqlite3_value** aArgv) {
    358  NS_ASSERTION(1 == aArgc, "Invalid number of arguments!");
    359 
    360  int len = ::sqlite3_value_bytes16(aArgv[0]) / sizeof(char16_t);
    361 
    362  // Set the result.
    363  ::sqlite3_result_int(aCtx, len);
    364 }
    365 
    366 }  // namespace storage
    367 }  // namespace mozilla