tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

cldr-quotes.pl (3898B)


      1 # This Source Code Form is subject to the terms of the Mozilla Public
      2 # License, v. 2.0. If a copy of the MPL was not distributed with this
      3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
      4 
      5 # Tool to generate the cldr-quotes.inc file, to be #include'd in Quotes.cpp
      6 # to provide locale-appropriate opening and closing quote marks.
      7 
      8 # To regenerate cldr-quotes.inc for a new CLDR release, download the data file
      9 # "cldr-common-##.zip" from http://unicode.org/Public/cldr/latest into the
     10 # current directory, run
     11 #
     12 #   perl cldr-quotes.pl <filename>  > cldr-quotes.inc
     13 #
     14 # (where <filename> is the downloaded cldr-common-## archive), and
     15 # then use `hg diff` to check that the result looks sane.
     16 
     17 use warnings;
     18 use strict;
     19 
     20 use Encode;
     21 use IO::Uncompress::Unzip "unzip";
     22 
     23 die "Usage: perl cldr-quotes.pl <filename>" unless $#ARGV == 0;
     24 
     25 my $filename = $ARGV[0];
     26 
     27 my (%langQuotes, %quoteLangs);
     28 
     29 my $zip = IO::Uncompress::Unzip->new($filename) ||
     30  die "unzip failed: $IO::Uncompress::Unzip::UnzipError\n";
     31 
     32 my $status = 1;
     33 while ($status > 0) {
     34  my $name = $zip->getHeaderInfo()->{Name};
     35  if ($name =~ m@common/main/([A-Za-z0-9_]+)\.xml@) {
     36    my $lang = $1;
     37    $lang =~ s/_/-/;
     38 
     39    $langQuotes{$lang}[0] = "";
     40    $langQuotes{$lang}[1] = "";
     41    $langQuotes{$lang}[2] = "";
     42    $langQuotes{$lang}[3] = "";
     43 
     44    while (<$zip>) {
     45      $langQuotes{$lang}[0] = $1 if (m!<quotationStart>(.+)<!);
     46      $langQuotes{$lang}[1] = $1 if (m!<quotationEnd>(.+)<!);
     47      $langQuotes{$lang}[2] = $1 if (m!<alternateQuotationStart>(.+)<!);
     48      $langQuotes{$lang}[3] = $1 if (m!<alternateQuotationEnd>(.+)<!);
     49    }
     50  }
     51  $status = $zip->nextStream();
     52 }
     53 $zip->close;
     54 
     55 foreach my $lang (sort keys %langQuotes) {
     56  # We don't actually want to emit anything for the root locale
     57  next if $lang eq "root";
     58 
     59  # Inherit any missing entries from the locale's parent
     60  my $parent = $lang;
     61  while ($parent =~ m/\-/) {
     62    # Strip off a trailing subtag to find a parent locale code
     63    $parent =~ s/\-[^-]+$//;
     64    # Fill in any values available from the parent
     65    for (my $i = 0; $i < 4; $i++) {
     66      $langQuotes{$lang}[$i] = $langQuotes{$parent}[$i] unless $langQuotes{$lang}[$i];
     67    }
     68  }
     69 
     70  # Anything still missing is copied from the root locale
     71  for (my $i = 0; $i < 4; $i++) {
     72    $langQuotes{$lang}[$i] = $langQuotes{"root"}[$i] unless $langQuotes{$lang}[$i];
     73  }
     74 
     75  # If the locale ends up the same as its parent, skip
     76  next if ($parent ne $lang) && (exists $langQuotes{$parent}) &&
     77    (join(",", @{$langQuotes{$lang}}) eq join(",", @{$langQuotes{$parent}}));
     78 
     79  # Create a string with the C source form for the array of 4 quote characters
     80  my $quoteChars = join(", ", map { sprintf("0x%x", ord Encode::decode("UTF-8", $_)) } @{$langQuotes{$lang}});
     81 
     82  # Record this locale in the list of those which use this particular set of quotes
     83  $quoteLangs{$quoteChars} = [] unless exists $quoteLangs{$quoteChars};
     84  push @{$quoteLangs{$quoteChars}}, $lang;
     85 }
     86 
     87 # Output each unique list of quotes, with the string of associated locales
     88 my $timestamp = gmtime();
     89 print <<__EOT__;
     90 /* This Source Code Form is subject to the terms of the Mozilla Public
     91 * License, v. 2.0. If a copy of the MPL was not distributed with this
     92 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
     93 
     94 /*
     95 * Derived from the Unicode Common Locale Data Repository by cldr-quotes.pl.
     96 *
     97 * For terms of use, see http://www.unicode.org/copyright.html.
     98 */
     99 
    100 /*
    101 * Created on $timestamp from CLDR data file $filename.
    102 *
    103 * * * * * This file contains MACHINE-GENERATED DATA, do not edit! * * * * *
    104 *
    105 * (generated by intl/locale/cldr-quotes.pl)
    106 */
    107 
    108 __EOT__
    109 
    110 print "static const LangQuotesRec sLangQuotes[] = {\n";
    111 print "  // clang-format off\n";
    112 print sort map { sprintf("  { \"%s\\0\", { { %s } } },\n", join("\\0", sort @{$quoteLangs{$_}}), $_) } (keys %quoteLangs);
    113 print "  // clang-format on\n";
    114 print "};\n";