hunspell_csutil.cxx (6387B)
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */ 3 /* ***** BEGIN LICENSE BLOCK ***** 4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1 5 * 6 * Copyright (C) 2002-2017 Németh László 7 * 8 * The contents of this file are subject to the Mozilla Public License Version 9 * 1.1 (the "License"); you may not use this file except in compliance with 10 * the License. You may obtain a copy of the License at 11 * http://www.mozilla.org/MPL/ 12 * 13 * Software distributed under the License is distributed on an "AS IS" basis, 14 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License 15 * for the specific language governing rights and limitations under the 16 * License. 17 * 18 * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. 19 * 20 * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, 21 * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, 22 * Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter, 23 * Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls, 24 * Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen 25 * 26 * Alternatively, the contents of this file may be used under the terms of 27 * either the GNU General Public License Version 2 or later (the "GPL"), or 28 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), 29 * in which case the provisions of the GPL or the LGPL are applicable instead 30 * of those above. If you wish to allow use of your version of this file only 31 * under the terms of either the GPL or the LGPL, and not to allow others to 32 * use your version of this file under the terms of the MPL, indicate your 33 * decision by deleting the provisions above and replace them with the notice 34 * and other provisions required by the GPL or the LGPL. If you do not delete 35 * the provisions above, a recipient may use your version of this file under 36 * the terms of any one of the MPL, the GPL or the LGPL. 37 * 38 * ***** END LICENSE BLOCK ***** */ 39 /* 40 * Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada 41 * And Contributors. All rights reserved. 42 * 43 * Redistribution and use in source and binary forms, with or without 44 * modification, are permitted provided that the following conditions 45 * are met: 46 * 47 * 1. Redistributions of source code must retain the above copyright 48 * notice, this list of conditions and the following disclaimer. 49 * 50 * 2. Redistributions in binary form must reproduce the above copyright 51 * notice, this list of conditions and the following disclaimer in the 52 * documentation and/or other materials provided with the distribution. 53 * 54 * 3. All modifications to the source code must be clearly marked as 55 * such. Binary redistributions based on modified source code 56 * must be clearly marked as modified versions in the documentation 57 * and/or other materials provided with the distribution. 58 * 59 * THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS 60 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 61 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 62 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL 63 * KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 64 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 65 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 66 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 67 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 68 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 69 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 70 * SUCH DAMAGE. 71 */ 72 #include "hunspell_csutil.hxx" 73 #include "mozilla/Encoding.h" 74 #include "mozilla/Span.h" 75 #include "nsUnicharUtils.h" 76 77 /* This is a copy of get_current_cs from the hunspell csutil.cxx file. 78 */ 79 struct cs_info* hunspell_get_current_cs(const std::string& es) { 80 struct cs_info* ccs = new cs_info[256]; 81 // Initialze the array with dummy data so that we wouldn't need 82 // to return null in case of failures. 83 for (int i = 0; i <= 0xff; ++i) { 84 ccs[i].ccase = false; 85 ccs[i].clower = i; 86 ccs[i].cupper = i; 87 } 88 89 auto encoding = mozilla::Encoding::ForLabelNoReplacement(es); 90 if (!encoding) { 91 return ccs; 92 } 93 auto encoder = encoding->NewEncoder(); 94 auto decoder = encoding->NewDecoderWithoutBOMHandling(); 95 96 for (unsigned int i = 0; i <= 0xff; ++i) { 97 bool success = false; 98 // We want to find the upper/lowercase equivalents of each byte 99 // in this 1-byte character encoding. Call our encoding/decoding 100 // APIs separately for each byte since they may reject some of the 101 // bytes, and we want to handle errors separately for each byte. 102 uint8_t lower, upper; 103 do { 104 if (i == 0) break; 105 uint8_t source = uint8_t(i); 106 char16_t uni[2]; 107 char16_t uniCased; 108 uint8_t destination[4]; 109 auto src1 = mozilla::Span(&source, 1); 110 auto dst1 = mozilla::Span(uni); 111 auto src2 = mozilla::Span(&uniCased, 1); 112 auto dst2 = mozilla::Span(destination); 113 114 uint32_t result; 115 size_t read; 116 size_t written; 117 std::tie(result, read, written) = 118 decoder->DecodeToUTF16WithoutReplacement(src1, dst1, true); 119 if (result != mozilla::kInputEmpty || read != 1 || written != 1) { 120 break; 121 } 122 123 uniCased = ToLowerCase(uni[0]); 124 std::tie(result, read, written) = 125 encoder->EncodeFromUTF16WithoutReplacement(src2, dst2, true); 126 if (result != mozilla::kInputEmpty || read != 1 || written != 1) { 127 break; 128 } 129 lower = destination[0]; 130 131 uniCased = ToUpperCase(uni[0]); 132 std::tie(result, read, written) = 133 encoder->EncodeFromUTF16WithoutReplacement(src2, dst2, true); 134 if (result != mozilla::kInputEmpty || read != 1 || written != 1) { 135 break; 136 } 137 upper = destination[0]; 138 139 success = true; 140 } while (0); 141 142 encoding->NewEncoderInto(*encoder); 143 encoding->NewDecoderWithoutBOMHandlingInto(*decoder); 144 145 if (success) { 146 ccs[i].cupper = upper; 147 ccs[i].clower = lower; 148 } else { 149 ccs[i].cupper = i; 150 ccs[i].clower = i; 151 } 152 153 if (ccs[i].clower != (unsigned char)i) 154 ccs[i].ccase = true; 155 else 156 ccs[i].ccase = false; 157 } 158 159 return ccs; 160 }