tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

afblue.pl (13048B)


      1 #! /usr/bin/perl -w
      2 # -*- Perl -*-
      3 #
      4 # afblue.pl
      5 #
      6 # Process a blue zone character data file.
      7 #
      8 # Copyright (C) 2013-2025 by
      9 # David Turner, Robert Wilhelm, and Werner Lemberg.
     10 #
     11 # This file is part of the FreeType project, and may only be used,
     12 # modified, and distributed under the terms of the FreeType project
     13 # license, LICENSE.TXT.  By continuing to use, modify, or distribute
     14 # this file you indicate that you have read the license and
     15 # understand and accept it fully.
     16 
     17 use strict;
     18 use warnings;
     19 use English '-no_match_vars';
     20 use open ':std', ':encoding(UTF-8)';
     21 
     22 
     23 my $prog = $PROGRAM_NAME;
     24 $prog =~ s| .* / ||x;      # Remove path.
     25 
     26 die "usage: $prog datafile < infile > outfile\n" if $#ARGV != 0;
     27 
     28 
     29 my $datafile = $ARGV[0];
     30 
     31 my %diversions;        # The extracted and massaged data from `datafile'.
     32 my @else_stack;        # Booleans to track else-clauses.
     33 my @name_stack;        # Stack of integers used for names of aux. variables.
     34 
     35 my $curr_enum;         # Name of the current enumeration.
     36 my $curr_array;        # Name of the current array.
     37 my $curr_max;          # Name of the current maximum value.
     38 
     39 my $curr_enum_element; # Name of the current enumeration element.
     40 my $curr_offset;       # The offset relative to current aux. variable.
     41 my $curr_elem_size;    # The number of non-space characters in the current string or
     42                       # the number of elements in the current block.
     43 
     44 my $have_sections = 0; # Boolean; set if start of a section has been seen.
     45 my $have_strings;      # Boolean; set if current section contains strings.
     46 my $have_blocks;       # Boolean; set if current section contains blocks.
     47 
     48 my $have_enum_element; # Boolean; set if we have an enumeration element.
     49 my $in_string;         # Boolean; set if a string has been parsed.
     50 
     51 my $num_sections = 0;  # Number of sections seen so far.
     52 
     53 my $last_aux;          # Name of last auxiliary variable.
     54 
     55 
     56 # Regular expressions.
     57 
     58 # [<ws>] <enum_name> <ws> <array_name> <ws> <max_name> [<ws>] ':' [<ws>] '\n'
     59 my $section_re = qr/ ^ \s* (\S+) \s+ (\S+) \s+ (\S+) \s* : \s* $ /x;
     60 
     61 # [<ws>] <enum_element_name> [<ws>] '\n'
     62 my $enum_element_re = qr/ ^ \s* ( [A-Za-z0-9_]+ ) \s* $ /x;
     63 
     64 # '#' <preprocessor directive> '\n'
     65 my $preprocessor_re = qr/ ^ \# /x;
     66 
     67 # [<ws>] '/' '/' <comment> '\n'
     68 my $comment_re = qr| ^ \s* // |x;
     69 
     70 # empty line
     71 my $whitespace_only_re = qr/ ^ \s* $ /x;
     72 
     73 # [<ws>] '"' <string> '"' [<ws>] '\n'  (<string> doesn't contain newlines)
     74 my $string_re = qr/ ^ \s*
     75                       " ( (?> (?: (?> [^"\\]+ ) | \\. )* ) ) "
     76                       \s* $ /x;
     77 
     78 # [<ws>] '{' <block> '}' [<ws>] '\n'  (<block> can contain newlines)
     79 my $block_start_re = qr/ ^ \s* \{ /x;
     80 
     81 # We need the capturing group for `split' to make it return the separator
     82 # tokens (i.e., the opening and closing brace) also.
     83 my $brace_re = qr/ ( [{}] ) /x;
     84 
     85 
     86 sub Warn
     87 {
     88  my $message = shift;
     89  warn "$datafile:$INPUT_LINE_NUMBER: warning: $message\n";
     90 }
     91 
     92 
     93 sub Die
     94 {
     95  my $message = shift;
     96  die "$datafile:$INPUT_LINE_NUMBER: error: $message\n";
     97 }
     98 
     99 
    100 my $warned_before = 0;
    101 
    102 sub warn_before
    103 {
    104  Warn("data before first section gets ignored") unless $warned_before;
    105  $warned_before = 1;
    106 }
    107 
    108 
    109 sub strip_newline
    110 {
    111  chomp;
    112  s/ \x0D $ //x;
    113 }
    114 
    115 
    116 sub end_curr_string
    117 {
    118  # Append final null byte to string.
    119  if ($have_strings)
    120  {
    121    push @{$diversions{$curr_array}}, "    '\\0',\n" if $in_string;
    122 
    123    $curr_offset++;
    124    $in_string = 0;
    125  }
    126 }
    127 
    128 
    129 sub update_max_elem_size
    130 {
    131  if ($curr_elem_size)
    132  {
    133    my $max = pop @{$diversions{$curr_max}};
    134    $max = $curr_elem_size if $curr_elem_size > $max;
    135    push @{$diversions{$curr_max}}, $max;
    136  }
    137 }
    138 
    139 
    140 sub convert_non_ascii_char
    141 {
    142  # A UTF-8 character outside of the printable ASCII range, with possibly a
    143  # leading backslash character.
    144  my $s = shift;
    145 
    146  # Here we count characters, not bytes.
    147  $curr_elem_size += length $s;
    148 
    149  utf8::encode($s);
    150  $s = uc unpack 'H*', $s;
    151 
    152  $curr_offset += $s =~ s/\G(..)/'\\x$1', /sg;
    153 
    154  return $s;
    155 }
    156 
    157 
    158 sub convert_ascii_chars
    159 {
    160  # A series of ASCII characters in the printable range.
    161  my $s = shift;
    162 
    163  # We reduce multiple space characters to a single one.
    164  $s =~ s/ +/ /g;
    165 
    166  # Count all non-space characters.  Note that `()' applies a list context
    167  # to the capture that is used to count the elements.
    168  $curr_elem_size += () = $s =~ /[^ ]/g;
    169 
    170  $curr_offset += $s =~ s/\G(.)/'$1', /g;
    171 
    172  return $s;
    173 }
    174 
    175 
    176 sub convert_literal
    177 {
    178  my $s = shift;
    179  my $orig = $s;
    180 
    181  # ASCII printables and space
    182  my $safe_re = '\x20-\x7E';
    183  # ASCII printables and space, no backslash
    184  my $safe_no_backslash_re = '\x20-\x5B\x5D-\x7E';
    185 
    186  $s =~ s{
    187           (?: \\? ( [^$safe_re] )
    188               | ( (?: [$safe_no_backslash_re]
    189                       | \\ [$safe_re] )+ ) )
    190         }
    191         {
    192           defined($1) ? convert_non_ascii_char($1)
    193                       : convert_ascii_chars($2)
    194         }egx;
    195 
    196   # We assume that `$orig' doesn't contain `*/'
    197   return $s . " /* $orig */";
    198 }
    199 
    200 
    201 sub aux_name
    202 {
    203  return "af_blue_" . $num_sections. "_" . join('_', @name_stack);
    204 }
    205 
    206 
    207 sub aux_name_next
    208 {
    209  $name_stack[$#name_stack]++;
    210  my $name = aux_name();
    211  $name_stack[$#name_stack]--;
    212 
    213  return $name;
    214 }
    215 
    216 
    217 sub enum_val_string
    218 {
    219  # Build string that holds code to save the current offset in an
    220  # enumeration element.
    221  my $aux = shift;
    222 
    223  my $add = ($last_aux eq "af_blue_" . $num_sections . "_0" )
    224              ? ""
    225              : "$last_aux + ";
    226 
    227  return "    $aux = $add$curr_offset,\n";
    228 }
    229 
    230 
    231 
    232 # Process data file.
    233 
    234 open(DATA, $datafile) || die "$prog: can't open \`$datafile': $OS_ERROR\n";
    235 
    236 while (<DATA>)
    237 {
    238  strip_newline();
    239 
    240  next if /$comment_re/;
    241  next if /$whitespace_only_re/;
    242 
    243  if (/$section_re/)
    244  {
    245    Warn("previous section is empty") if ($have_sections
    246                                          && !$have_strings
    247                                          && !$have_blocks);
    248 
    249    end_curr_string();
    250    update_max_elem_size();
    251 
    252    # Save captured groups from `section_re'.
    253    $curr_enum = $1;
    254    $curr_array = $2;
    255    $curr_max = $3;
    256 
    257    $curr_enum_element = "";
    258    $curr_offset = 0;
    259 
    260    Warn("overwriting already defined enumeration \`$curr_enum'")
    261      if exists($diversions{$curr_enum});
    262    Warn("overwriting already defined array \`$curr_array'")
    263      if exists($diversions{$curr_array});
    264    Warn("overwriting already defined maximum value \`$curr_max'")
    265      if exists($diversions{$curr_max});
    266 
    267    $diversions{$curr_enum} = [];
    268    $diversions{$curr_array} = [];
    269    $diversions{$curr_max} = [];
    270 
    271    push @{$diversions{$curr_max}}, 0;
    272 
    273    @name_stack = ();
    274    push @name_stack, 0;
    275 
    276    $have_sections = 1;
    277    $have_strings = 0;
    278    $have_blocks = 0;
    279 
    280    $have_enum_element = 0;
    281    $in_string = 0;
    282 
    283    $num_sections++;
    284    $curr_elem_size = 0;
    285 
    286    $last_aux = aux_name();
    287 
    288    next;
    289  }
    290 
    291  if (/$preprocessor_re/)
    292  {
    293    if ($have_sections)
    294    {
    295      # Having preprocessor conditionals complicates the computation of
    296      # correct offset values.  We have to introduce auxiliary enumeration
    297      # elements with the name `af_blue_<s>_<n1>_<n2>_...' that store
    298      # offsets to be used in conditional clauses.  `<s>' is the number of
    299      # sections seen so far, `<n1>' is the number of `#if' and `#endif'
    300      # conditionals seen so far in the topmost level, `<n2>' the number of
    301      # `#if' and `#endif' conditionals seen so far one level deeper, etc.
    302      # As a consequence, uneven values are used within a clause, and even
    303      # values after a clause, since the C standard doesn't allow the
    304      # redefinition of an enumeration value.  For example, the name
    305      # `af_blue_5_1_6' is used to construct enumeration values in the fifth
    306      # section after the third (second-level) if-clause within the first
    307      # (top-level) if-clause.  After the first top-level clause has
    308      # finished, `af_blue_5_2' is used.  The current offset is then
    309      # relative to the value stored in the current auxiliary element.
    310 
    311      if (/ ^ \# \s* if /x)
    312      {
    313        push @else_stack, 0;
    314 
    315        $name_stack[$#name_stack]++;
    316 
    317        push @{$diversions{$curr_enum}}, enum_val_string(aux_name());
    318        $last_aux = aux_name();
    319 
    320        push @name_stack, 0;
    321 
    322        $curr_offset = 0;
    323      }
    324      elsif (/ ^ \# \s* elif /x)
    325      {
    326        Die("unbalanced #elif") unless @else_stack;
    327 
    328        pop @name_stack;
    329 
    330        push @{$diversions{$curr_enum}}, enum_val_string(aux_name_next());
    331        $last_aux = aux_name();
    332 
    333        push @name_stack, 0;
    334 
    335        $curr_offset = 0;
    336      }
    337      elsif (/ ^ \# \s* else /x)
    338      {
    339        my $prev_else = pop @else_stack;
    340        Die("unbalanced #else") unless defined($prev_else);
    341        Die("#else already seen") if $prev_else;
    342        push @else_stack, 1;
    343 
    344        pop @name_stack;
    345 
    346        push @{$diversions{$curr_enum}}, enum_val_string(aux_name_next());
    347        $last_aux = aux_name();
    348 
    349        push @name_stack, 0;
    350 
    351        $curr_offset = 0;
    352      }
    353      elsif (/ ^ (\# \s*) endif /x)
    354      {
    355        my $prev_else = pop @else_stack;
    356        Die("unbalanced #endif") unless defined($prev_else);
    357 
    358        pop @name_stack;
    359 
    360        # If there is no else-clause for an if-clause, we add one.  This is
    361        # necessary to have correct offsets.
    362        if (!$prev_else)
    363        {
    364          # Use amount of whitespace from `endif'.
    365          push @{$diversions{$curr_enum}}, enum_val_string(aux_name_next())
    366                                           . $1 . "else\n";
    367          $last_aux = aux_name();
    368 
    369          $curr_offset = 0;
    370        }
    371 
    372        $name_stack[$#name_stack]++;
    373 
    374        push @{$diversions{$curr_enum}}, enum_val_string(aux_name());
    375        $last_aux = aux_name();
    376 
    377        $curr_offset = 0;
    378      }
    379 
    380      # Handle (probably continued) preprocessor lines.
    381    CONTINUED_LOOP:
    382      {
    383        do
    384        {
    385          strip_newline();
    386 
    387          push @{$diversions{$curr_enum}}, $ARG . "\n";
    388          push @{$diversions{$curr_array}}, $ARG . "\n";
    389 
    390          last CONTINUED_LOOP unless / \\ $ /x;
    391 
    392        } while (<DATA>);
    393      }
    394    }
    395    else
    396    {
    397      warn_before();
    398    }
    399 
    400    next;
    401  }
    402 
    403  if (/$enum_element_re/)
    404  {
    405    end_curr_string();
    406    update_max_elem_size();
    407 
    408    $curr_enum_element = $1;
    409    $have_enum_element = 1;
    410    $curr_elem_size = 0;
    411 
    412    next;
    413  }
    414 
    415  if (/$string_re/)
    416  {
    417    if ($have_sections)
    418    {
    419      Die("strings and blocks can't be mixed in a section") if $have_blocks;
    420 
    421      # Save captured group from `string_re'.
    422      my $string = $1;
    423 
    424      if ($have_enum_element)
    425      {
    426        push @{$diversions{$curr_enum}}, enum_val_string($curr_enum_element);
    427        $have_enum_element = 0;
    428      }
    429 
    430      $string = convert_literal($string);
    431 
    432      push @{$diversions{$curr_array}}, "    $string\n";
    433 
    434      $have_strings = 1;
    435      $in_string = 1;
    436    }
    437    else
    438    {
    439      warn_before();
    440    }
    441 
    442    next;
    443  }
    444 
    445  if (/$block_start_re/)
    446  {
    447    if ($have_sections)
    448    {
    449      Die("strings and blocks can't be mixed in a section") if $have_strings;
    450 
    451      my $depth = 0;
    452      my $block = "";
    453      my $block_end = 0;
    454 
    455      # Count braces while getting the block.
    456    BRACE_LOOP:
    457      {
    458        do
    459        {
    460          strip_newline();
    461 
    462          foreach my $substring (split(/$brace_re/))
    463          {
    464            if ($block_end)
    465            {
    466              Die("invalid data after last matching closing brace")
    467                if $substring !~ /$whitespace_only_re/;
    468            }
    469 
    470            $block .= $substring;
    471 
    472            if ($substring eq '{')
    473            {
    474              $depth++;
    475            }
    476            elsif ($substring eq '}')
    477            {
    478              $depth--;
    479 
    480              $block_end = 1 if $depth == 0;
    481            }
    482          }
    483 
    484          # If we are here, we have run out of substrings, so get next line
    485          # or exit.
    486          last BRACE_LOOP if $block_end;
    487 
    488          $block .= "\n";
    489 
    490        } while (<DATA>);
    491      }
    492 
    493      if ($have_enum_element)
    494      {
    495        push @{$diversions{$curr_enum}}, enum_val_string($curr_enum_element);
    496        $have_enum_element = 0;
    497      }
    498 
    499      push @{$diversions{$curr_array}}, $block . ",\n";
    500 
    501      $curr_offset++;
    502      $curr_elem_size++;
    503 
    504      $have_blocks = 1;
    505    }
    506    else
    507    {
    508      warn_before();
    509    }
    510 
    511    next;
    512  }
    513 
    514  # Garbage.  We weren't able to parse the data.
    515  Die("syntax error");
    516 }
    517 
    518 # Finalize data.
    519 end_curr_string();
    520 update_max_elem_size();
    521 
    522 
    523 # Filter stdin to stdout, replacing `@...@' templates.
    524 
    525 sub emit_diversion
    526 {
    527  my $diversion_name = shift;
    528  return (exists($diversions{$1})) ? "@{$diversions{$1}}"
    529                                   : "@" . $diversion_name . "@";
    530 }
    531 
    532 
    533 $LIST_SEPARATOR = '';
    534 
    535 my $s1 = "This file has been generated by the Perl script \`$prog',";
    536 my $s1len = length $s1;
    537 my $s2 = "using data from file \`$datafile'.";
    538 my $s2len = length $s2;
    539 my $slen = ($s1len > $s2len) ? $s1len : $s2len;
    540 
    541 print "/* " . $s1 . " " x ($slen - $s1len) . " */\n"
    542      . "/* " . $s2 . " " x ($slen - $s2len) . " */\n"
    543      . "\n";
    544 
    545 while (<STDIN>)
    546 {
    547  s/ @ ( [A-Za-z0-9_]+? ) @ / emit_diversion($1) /egx;
    548  print;
    549 }
    550 
    551 # EOF