tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

prepare (1290B)


      1 #!/bin/bash
      2 # Check common misspellings
      3 # input file format:
      4 # word->word1, ...
      5 # Source: http://en.wikipedia.org/wiki/Wikipedia:Lists_of_common_misspellings/For_machines
      6 
      7 hunspell=../../src/tools/hunspell
      8 hlang=${HUNSPELL:-en_US}
      9 alang=${ASPELL:-en_US}
     10 input=${INPUT:-List_of_common_misspellings.txt}
     11 
     12 # remove bad words recognised by Hunspell as good
     13 cat $input | sed 's/[-]>/	/' | $hunspell -d $hlang -1 -L |
     14 
     15 # remove items with dash for Aspell
     16 grep '^[^-]*	' |
     17 
     18 # remove spaces from end of lines
     19 sed 's/ *$//' >$input.1
     20 
     21 # remove bad words recognised by Aspell as good
     22 cut -f 1 -d '	' $input.1 | aspell -l $alang --list |
     23 awk 'FILENAME=="-"{a[$1]=1;next}a[$1]{print$0}' - $input.1 |
     24 
     25 # change commas with tabs
     26 sed 's/, */	/g' >$input.2
     27 
     28 # remove lines with unrecognised suggestions (except suggestion with spaces)
     29 cut -d '	' -f 2- $input.2 | tr "\t" "\n" | grep -v ' ' >x.1
     30 cat x.1 | $hunspell -l -d $hlang >x.2
     31 cat x.1 | aspell -l $alang --list >>x.2
     32 cat x.2 | awk 'BEGIN{FS="\t"}
     33 FILENAME=="-"{a[$1]=1;next}a[$2]!=1 && a[$3]!=1{print $0}' - $input.2 >$input.3
     34 
     35 cut -f 1 -d '	' $input.3 | aspell -l $alang -a | grep -v ^$ | sed -n '2,$p' |
     36 sed 's/^.*: //;s/, /	/g' >$input.4
     37 
     38 cat $input.3 | $hunspell -d $hlang -a -1 | grep -v ^$ | sed -n '2,$p' |
     39 sed 's/^.*: //;s/, /	/g' >$input.5
     40