tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

hunspell_csutil.cxx (6387B)


      1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
      2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
      3 /* ***** BEGIN LICENSE BLOCK *****
      4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
      5 *
      6 * Copyright (C) 2002-2017 Németh László
      7 *
      8 * The contents of this file are subject to the Mozilla Public License Version
      9 * 1.1 (the "License"); you may not use this file except in compliance with
     10 * the License. You may obtain a copy of the License at
     11 * http://www.mozilla.org/MPL/
     12 *
     13 * Software distributed under the License is distributed on an "AS IS" basis,
     14 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
     15 * for the specific language governing rights and limitations under the
     16 * License.
     17 *
     18 * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks.
     19 *
     20 * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
     21 * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
     22 * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
     23 * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
     24 * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
     25 *
     26 * Alternatively, the contents of this file may be used under the terms of
     27 * either the GNU General Public License Version 2 or later (the "GPL"), or
     28 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
     29 * in which case the provisions of the GPL or the LGPL are applicable instead
     30 * of those above. If you wish to allow use of your version of this file only
     31 * under the terms of either the GPL or the LGPL, and not to allow others to
     32 * use your version of this file under the terms of the MPL, indicate your
     33 * decision by deleting the provisions above and replace them with the notice
     34 * and other provisions required by the GPL or the LGPL. If you do not delete
     35 * the provisions above, a recipient may use your version of this file under
     36 * the terms of any one of the MPL, the GPL or the LGPL.
     37 *
     38 * ***** END LICENSE BLOCK ***** */
     39 /*
     40 * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
     41 * And Contributors.  All rights reserved.
     42 *
     43 * Redistribution and use in source and binary forms, with or without
     44 * modification, are permitted provided that the following conditions
     45 * are met:
     46 *
     47 * 1. Redistributions of source code must retain the above copyright
     48 *    notice, this list of conditions and the following disclaimer.
     49 *
     50 * 2. Redistributions in binary form must reproduce the above copyright
     51 *    notice, this list of conditions and the following disclaimer in the
     52 *    documentation and/or other materials provided with the distribution.
     53 *
     54 * 3. All modifications to the source code must be clearly marked as
     55 *    such.  Binary redistributions based on modified source code
     56 *    must be clearly marked as modified versions in the documentation
     57 *    and/or other materials provided with the distribution.
     58 *
     59 * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
     60 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     61 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
     62 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL
     63 * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
     64 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
     65 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     66 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     67 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     68 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     69 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     70 * SUCH DAMAGE.
     71 */
     72 #include "hunspell_csutil.hxx"
     73 #include "mozilla/Encoding.h"
     74 #include "mozilla/Span.h"
     75 #include "nsUnicharUtils.h"
     76 
     77 /* This is a copy of get_current_cs from the hunspell csutil.cxx file.
     78 */
     79 struct cs_info* hunspell_get_current_cs(const std::string& es) {
     80  struct cs_info* ccs = new cs_info[256];
     81  // Initialze the array with dummy data so that we wouldn't need
     82  // to return null in case of failures.
     83  for (int i = 0; i <= 0xff; ++i) {
     84    ccs[i].ccase = false;
     85    ccs[i].clower = i;
     86    ccs[i].cupper = i;
     87  }
     88 
     89  auto encoding = mozilla::Encoding::ForLabelNoReplacement(es);
     90  if (!encoding) {
     91    return ccs;
     92  }
     93  auto encoder = encoding->NewEncoder();
     94  auto decoder = encoding->NewDecoderWithoutBOMHandling();
     95 
     96  for (unsigned int i = 0; i <= 0xff; ++i) {
     97    bool success = false;
     98    // We want to find the upper/lowercase equivalents of each byte
     99    // in this 1-byte character encoding.  Call our encoding/decoding
    100    // APIs separately for each byte since they may reject some of the
    101    // bytes, and we want to handle errors separately for each byte.
    102    uint8_t lower, upper;
    103    do {
    104      if (i == 0) break;
    105      uint8_t source = uint8_t(i);
    106      char16_t uni[2];
    107      char16_t uniCased;
    108      uint8_t destination[4];
    109      auto src1 = mozilla::Span(&source, 1);
    110      auto dst1 = mozilla::Span(uni);
    111      auto src2 = mozilla::Span(&uniCased, 1);
    112      auto dst2 = mozilla::Span(destination);
    113 
    114      uint32_t result;
    115      size_t read;
    116      size_t written;
    117      std::tie(result, read, written) =
    118          decoder->DecodeToUTF16WithoutReplacement(src1, dst1, true);
    119      if (result != mozilla::kInputEmpty || read != 1 || written != 1) {
    120        break;
    121      }
    122 
    123      uniCased = ToLowerCase(uni[0]);
    124      std::tie(result, read, written) =
    125          encoder->EncodeFromUTF16WithoutReplacement(src2, dst2, true);
    126      if (result != mozilla::kInputEmpty || read != 1 || written != 1) {
    127        break;
    128      }
    129      lower = destination[0];
    130 
    131      uniCased = ToUpperCase(uni[0]);
    132      std::tie(result, read, written) =
    133          encoder->EncodeFromUTF16WithoutReplacement(src2, dst2, true);
    134      if (result != mozilla::kInputEmpty || read != 1 || written != 1) {
    135        break;
    136      }
    137      upper = destination[0];
    138 
    139      success = true;
    140    } while (0);
    141 
    142    encoding->NewEncoderInto(*encoder);
    143    encoding->NewDecoderWithoutBOMHandlingInto(*decoder);
    144 
    145    if (success) {
    146      ccs[i].cupper = upper;
    147      ccs[i].clower = lower;
    148    } else {
    149      ccs[i].cupper = i;
    150      ccs[i].clower = i;
    151    }
    152 
    153    if (ccs[i].clower != (unsigned char)i)
    154      ccs[i].ccase = true;
    155    else
    156      ccs[i].ccase = false;
    157  }
    158 
    159  return ccs;
    160 }