edit-dictionary.sh (2768B)
1 #! /usr/bin/env sh 2 3 # This Source Code Form is subject to the terms of the Mozilla Public 4 # License, v. 2.0. If a copy of the MPL was not distributed with this 5 # file, You can obtain one at http://mozilla.org/MPL/2.0/. 6 7 set -e 8 9 WKDIR="`pwd`" 10 SPELLER="$WKDIR/scowl/speller" 11 12 munch() { 13 $SPELLER/munch-list munch $1 | sort -u 14 } 15 16 expand() { 17 grep -v '^[0-9]\+$' | $SPELLER/munch-list expand $1 | sort -u 18 } 19 20 if [ ! -d "$SPELLER" ]; then 21 echo "The 'scowl' folder is missing. Check the documentation at" 22 echo "https://firefox-source-docs.mozilla.org/extensions/spellcheck/index.html" 23 exit 1 24 fi 25 26 if [ -z "$EDITOR" ]; then 27 echo 'Need to set the $EDITOR environment variable to your favorite editor.' 28 exit 1 29 fi 30 31 # Open the editor and allow the user to type or paste words 32 echo "Editor is going to open, you can add the list of words. Quit the editor to finish editing." 33 echo "Press Enter to begin." 34 read foo 35 $EDITOR temp-list.txt 36 37 if [ ! -f temp-list.txt ]; then 38 echo "The content of the editor hasn't been saved." 39 exit 1 40 fi 41 # Remove empty lines 42 sed -i "" "/^$/d" temp-list.txt 43 44 # Copy the current en-US dictionary and strip the first line that contains 45 # the count. 46 tail -n +2 ../en-US.dic > en-US.stripped 47 48 # Convert the file to UTF-8 49 iconv -f iso-8859-1 -t utf-8 en-US.stripped > en-US.utf8 50 rm en-US.stripped 51 52 # Save to a temporary file words excluded from suggestions, and numerals, 53 # since the munched result is different for both. 54 grep '!$' < utf8/en-US-utf8.dic > en-US-nosug.txt 55 grep '^[0-9][a-z/]' < utf8/en-US-utf8.dic > en-US-numerals.txt 56 57 # Expand the dictionary to a word list 58 expand ../en-US.aff < en-US.utf8 > en-US-wordlist.txt 59 rm en-US.utf8 60 61 # Add the new words 62 cat temp-list.txt >> en-US-wordlist.txt 63 rm temp-list.txt 64 65 # Remove numerals from the expanded wordlist 66 grep -v '^[0-9]' < en-US-wordlist.txt > en-US-wordlist-nonum.txt 67 rm en-US-wordlist.txt 68 69 # Run the wordlist through the munch script, to compress the dictionary where 70 # possible (using affix rules). 71 munch ../en-US.aff < en-US-wordlist-nonum.txt > en-US-munched.dic 72 rm en-US-wordlist-nonum.txt 73 74 # Remove words that should not be suggested 75 while IFS='/' read -ra line 76 do 77 sed -E -i "" "\:^$line($|/.*):d" en-US-munched.dic 78 done < "en-US-nosug.txt" 79 80 # Add back suggestion exclusions and numerals from the original .dic file 81 cat en-US-nosug.txt >> en-US-munched.dic 82 cat en-US-numerals.txt >> en-US-munched.dic 83 rm en-US-nosug.txt 84 rm en-US-numerals.txt 85 86 # Add back the line count and sort the lines 87 wc -l < en-US-munched.dic | tr -d '[:blank:]' > en-US.dic 88 LC_ALL=C sort en-US-munched.dic >> en-US.dic 89 rm -f en-US-munched.dic 90 91 # Convert back to ISO-8859-1 92 iconv -f utf-8 -t iso-8859-1 en-US.dic > ../en-US.dic 93 94 # Keep a copy of the UTF-8 file in /utf8 95 mv en-US.dic utf8/en-US-utf8.dic