tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

visual_metrics.py (16931B)


      1 #!/usr/bin/python
      2 #
      3 # Copyright (c) 2016, Alliance for Open Media. All rights reserved.
      4 #
      5 # This source code is subject to the terms of the BSD 2 Clause License and
      6 # the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      7 # was not distributed with this source code in the LICENSE file, you can
      8 # obtain it at www.aomedia.org/license/software. If the Alliance for Open
      9 # Media Patent License 1.0 was not distributed with this source code in the
     10 # PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     11 #
     12 
     13 """Converts video encoding result data from text files to visualization
     14 data source."""
     15 
     16 __author__ = "jzern@google.com (James Zern),"
     17 __author__ += "jimbankoski@google.com (Jim Bankoski)"
     18 
     19 import fnmatch
     20 import numpy as np
     21 import scipy as sp
     22 import scipy.interpolate
     23 import os
     24 import re
     25 import string
     26 import sys
     27 import math
     28 import warnings
     29 
     30 import gviz_api
     31 
     32 from os.path import basename
     33 from os.path import splitext
     34 
     35 warnings.simplefilter('ignore', np.RankWarning)
     36 warnings.simplefilter('ignore', RuntimeWarning)
     37 
     38 def bdsnr2(metric_set1, metric_set2):
     39  """
     40  BJONTEGAARD    Bjontegaard metric calculation adapted
     41  Bjontegaard's snr metric allows to compute the average % saving in decibels
     42  between two rate-distortion curves [1].  This is an adaptation of that
     43  method that fixes inconsistencies when the curve fit operation goes awry
     44  by replacing the curve fit function with a Piecewise Cubic Hermite
     45  Interpolating Polynomial and then integrating that by evaluating that
     46  function at small intervals using the trapezoid method to calculate
     47  the integral.
     48 
     49  metric_set1 - list of tuples ( bitrate,  metric ) for first graph
     50  metric_set2 - list of tuples ( bitrate,  metric ) for second graph
     51  """
     52 
     53  if not metric_set1 or not metric_set2:
     54    return 0.0
     55 
     56  try:
     57 
     58    # pchip_interlopate requires keys sorted by x axis. x-axis will
     59    # be our metric not the bitrate so sort by metric.
     60    metric_set1.sort()
     61    metric_set2.sort()
     62 
     63    # Pull the log of the rate and clamped psnr from metric_sets.
     64    log_rate1 = [math.log(x[0]) for x in metric_set1]
     65    metric1 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set1]
     66    log_rate2 = [math.log(x[0]) for x in metric_set2]
     67    metric2 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set2]
     68 
     69    # Integration interval.  This metric only works on the area that's
     70    # overlapping.   Extrapolation of these things is sketchy so we avoid.
     71    min_int = max([min(log_rate1), min(log_rate2)])
     72    max_int = min([max(log_rate1), max(log_rate2)])
     73 
     74    # No overlap means no sensible metric possible.
     75    if max_int <= min_int:
     76      return 0.0
     77 
     78    # Use Piecewise Cubic Hermite Interpolating Polynomial interpolation to
     79    # create 100 new samples points separated by interval.
     80    lin = np.linspace(min_int, max_int, num=100, retstep=True)
     81    interval = lin[1]
     82    samples = lin[0]
     83    v1 = scipy.interpolate.pchip_interpolate(log_rate1, metric1, samples)
     84    v2 = scipy.interpolate.pchip_interpolate(log_rate2, metric2, samples)
     85 
     86    # Calculate the integral using the trapezoid method on the samples.
     87    int_v1 = np.trapz(v1, dx=interval)
     88    int_v2 = np.trapz(v2, dx=interval)
     89 
     90    # Calculate the average improvement.
     91    avg_exp_diff = (int_v2 - int_v1) / (max_int - min_int)
     92 
     93  except (TypeError, ZeroDivisionError, ValueError, np.RankWarning) as e:
     94    return 0
     95 
     96  return avg_exp_diff
     97 
     98 def bdrate2(metric_set1, metric_set2):
     99  """
    100  BJONTEGAARD    Bjontegaard metric calculation adapted
    101  Bjontegaard's metric allows to compute the average % saving in bitrate
    102  between two rate-distortion curves [1].  This is an adaptation of that
    103  method that fixes inconsistencies when the curve fit operation goes awry
    104  by replacing the curve fit function with a Piecewise Cubic Hermite
    105  Interpolating Polynomial and then integrating that by evaluating that
    106  function at small intervals using the trapezoid method to calculate
    107  the integral.
    108 
    109  metric_set1 - list of tuples ( bitrate,  metric ) for first graph
    110  metric_set2 - list of tuples ( bitrate,  metric ) for second graph
    111  """
    112 
    113  if not metric_set1 or not metric_set2:
    114    return 0.0
    115 
    116  try:
    117 
    118    # pchip_interlopate requires keys sorted by x axis. x-axis will
    119    # be our metric not the bitrate so sort by metric.
    120    metric_set1.sort(key=lambda tup: tup[1])
    121    metric_set2.sort(key=lambda tup: tup[1])
    122 
    123    # Pull the log of the rate and clamped psnr from metric_sets.
    124    log_rate1 = [math.log(x[0]) for x in metric_set1]
    125    metric1 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set1]
    126    log_rate2 = [math.log(x[0]) for x in metric_set2]
    127    metric2 = [100.0 if x[1] == float('inf') else x[1] for x in metric_set2]
    128 
    129    # Integration interval.  This metric only works on the area that's
    130    # overlapping.   Extrapolation of these things is sketchy so we avoid.
    131    min_int = max([min(metric1), min(metric2)])
    132    max_int = min([max(metric1), max(metric2)])
    133 
    134    # No overlap means no sensible metric possible.
    135    if max_int <= min_int:
    136      return 0.0
    137 
    138    # Use Piecewise Cubic Hermite Interpolating Polynomial interpolation to
    139    # create 100 new samples points separated by interval.
    140    lin = np.linspace(min_int, max_int, num=100, retstep=True)
    141    interval = lin[1]
    142    samples = lin[0]
    143    v1 = scipy.interpolate.pchip_interpolate(metric1, log_rate1, samples)
    144    v2 = scipy.interpolate.pchip_interpolate(metric2, log_rate2, samples)
    145 
    146    # Calculate the integral using the trapezoid method on the samples.
    147    int_v1 = np.trapz(v1, dx=interval)
    148    int_v2 = np.trapz(v2, dx=interval)
    149 
    150    # Calculate the average improvement.
    151    avg_exp_diff = (int_v2 - int_v1) / (max_int - min_int)
    152 
    153  except (TypeError, ZeroDivisionError, ValueError, np.RankWarning) as e:
    154    return 0
    155 
    156  # Convert to a percentage.
    157  avg_diff = (math.exp(avg_exp_diff) - 1) * 100
    158 
    159  return avg_diff
    160 
    161 
    162 
    163 def FillForm(string_for_substitution, dictionary_of_vars):
    164  """
    165  This function substitutes all matches of the command string //%% ... %%//
    166  with the variable represented by ...  .
    167  """
    168  return_string = string_for_substitution
    169  for i in re.findall("//%%(.*)%%//", string_for_substitution):
    170    return_string = re.sub("//%%" + i + "%%//", dictionary_of_vars[i],
    171                           return_string)
    172  return return_string
    173 
    174 
    175 def HasMetrics(line):
    176  """
    177  The metrics files produced by aomenc are started with a B for headers.
    178  """
    179  # If the first char of the first word on the line is a digit
    180  if len(line) == 0:
    181    return False
    182  if len(line.split()) == 0:
    183    return False
    184  if line.split()[0][0:1].isdigit():
    185    return True
    186  return False
    187 
    188 def GetMetrics(file_name):
    189  metric_file = open(file_name, "r")
    190  return metric_file.readline().split();
    191 
    192 def ParseMetricFile(file_name, metric_column):
    193  metric_set1 = set([])
    194  metric_file = open(file_name, "r")
    195  for line in metric_file:
    196    metrics = string.split(line)
    197    if HasMetrics(line):
    198      if metric_column < len(metrics):
    199        try:
    200          tuple = float(metrics[0]), float(metrics[metric_column])
    201        except:
    202          tuple = float(metrics[0]), 0
    203      else:
    204        tuple = float(metrics[0]), 0
    205      metric_set1.add(tuple)
    206  metric_set1_sorted = sorted(metric_set1)
    207  return metric_set1_sorted
    208 
    209 
    210 def FileBetter(file_name_1, file_name_2, metric_column, method):
    211  """
    212  Compares two data files and determines which is better and by how
    213  much. Also produces a histogram of how much better, by PSNR.
    214  metric_column is the metric.
    215  """
    216  # Store and parse our two files into lists of unique tuples.
    217 
    218  # Read the two files, parsing out lines starting with bitrate.
    219  metric_set1_sorted = ParseMetricFile(file_name_1, metric_column)
    220  metric_set2_sorted = ParseMetricFile(file_name_2, metric_column)
    221 
    222 
    223  def GraphBetter(metric_set1_sorted, metric_set2_sorted, base_is_set_2):
    224    """
    225    Search through the sorted metric file for metrics on either side of
    226    the metric from file 1.  Since both lists are sorted we really
    227    should not have to search through the entire range, but these
    228    are small files."""
    229    total_bitrate_difference_ratio = 0.0
    230    count = 0
    231    for bitrate, metric in metric_set1_sorted:
    232      if bitrate == 0:
    233        continue
    234      for i in range(len(metric_set2_sorted) - 1):
    235        s2_bitrate_0, s2_metric_0 = metric_set2_sorted[i]
    236        s2_bitrate_1, s2_metric_1 = metric_set2_sorted[i + 1]
    237        # We have a point on either side of our metric range.
    238        if metric > s2_metric_0 and metric <= s2_metric_1:
    239 
    240          # Calculate a slope.
    241          if s2_metric_1 - s2_metric_0 != 0:
    242            metric_slope = ((s2_bitrate_1 - s2_bitrate_0) /
    243                            (s2_metric_1 - s2_metric_0))
    244          else:
    245            metric_slope = 0
    246 
    247          estimated_s2_bitrate = (s2_bitrate_0 + (metric - s2_metric_0) *
    248                                  metric_slope)
    249 
    250          if estimated_s2_bitrate == 0:
    251            continue
    252          # Calculate percentage difference as given by base.
    253          if base_is_set_2 == 0:
    254            bitrate_difference_ratio = ((bitrate - estimated_s2_bitrate) /
    255                                        bitrate)
    256          else:
    257            bitrate_difference_ratio = ((bitrate - estimated_s2_bitrate) /
    258                                        estimated_s2_bitrate)
    259 
    260          total_bitrate_difference_ratio += bitrate_difference_ratio
    261          count += 1
    262          break
    263 
    264    # Calculate the average improvement between graphs.
    265    if count != 0:
    266      avg = total_bitrate_difference_ratio / count
    267 
    268    else:
    269      avg = 0.0
    270 
    271    return avg
    272 
    273  # Be fair to both graphs by testing all the points in each.
    274  if method == 'avg':
    275    avg_improvement = 50 * (
    276                       GraphBetter(metric_set1_sorted, metric_set2_sorted, 1) -
    277                       GraphBetter(metric_set2_sorted, metric_set1_sorted, 0))
    278  elif method == 'dsnr':
    279      avg_improvement = bdsnr2(metric_set1_sorted, metric_set2_sorted)
    280  else:
    281      avg_improvement = bdrate2(metric_set2_sorted, metric_set1_sorted)
    282 
    283  return avg_improvement
    284 
    285 
    286 def HandleFiles(variables):
    287  """
    288  This script creates html for displaying metric data produced from data
    289  in a video stats file,  as created by the AOM project when enable_psnr
    290  is turned on:
    291 
    292  Usage: visual_metrics.py template.html pattern base_dir sub_dir [ sub_dir2 ..]
    293 
    294  The script parses each metrics file [see below] that matches the
    295  statfile_pattern  in the baseline directory and looks for the file that
    296  matches that same file in each of the sub_dirs, and compares the resultant
    297  metrics bitrate, avg psnr, glb psnr, and ssim. "
    298 
    299  It provides a table in which each row is a file in the line directory,
    300  and a column for each subdir, with the cells representing how that clip
    301  compares to baseline for that subdir.   A graph is given for each which
    302  compares file size to that metric.  If you click on a point in the graph it
    303  zooms in on that point.
    304 
    305  a SAMPLE metrics file:
    306 
    307  Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us)
    308   25.911   38.242   38.104   38.258   38.121   75.790    14103
    309  Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us)
    310   49.982   41.264   41.129   41.255   41.122   83.993    19817
    311  Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us)
    312   74.967   42.911   42.767   42.899   42.756   87.928    17332
    313  Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us)
    314  100.012   43.983   43.838   43.881   43.738   89.695    25389
    315  Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us)
    316  149.980   45.338   45.203   45.184   45.043   91.591    25438
    317  Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us)
    318  199.852   46.225   46.123   46.113   45.999   92.679    28302
    319  Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us)
    320  249.922   46.864   46.773   46.777   46.673   93.334    27244
    321  Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us)
    322  299.998   47.366   47.281   47.317   47.220   93.844    27137
    323  Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us)
    324  349.769   47.746   47.677   47.722   47.648   94.178    32226
    325  Bitrate  AVGPsnr  GLBPsnr  AVPsnrP  GLPsnrP  VPXSSIM    Time(us)
    326  399.773   48.032   47.971   48.013   47.946   94.362    36203
    327 
    328  sample use:
    329  visual_metrics.py template.html "*stt" aom aom_b aom_c > metrics.html
    330  """
    331 
    332  # The template file is the html file into which we will write the
    333  # data from the stats file, formatted correctly for the gviz_api.
    334  template_file = open(variables[1], "r")
    335  page_template = template_file.read()
    336  template_file.close()
    337 
    338  # This is the path match pattern for finding stats files amongst
    339  # all the other files it could be.  eg: *.stt
    340  file_pattern = variables[2]
    341 
    342  # This is the directory with files that we will use to do the comparison
    343  # against.
    344  baseline_dir = variables[3]
    345  snrs = ''
    346  filestable = {}
    347 
    348  filestable['dsnr'] = ''
    349  filestable['drate'] = ''
    350  filestable['avg'] = ''
    351 
    352  # Dirs is directories after the baseline to compare to the base.
    353  dirs = variables[4:len(variables)]
    354 
    355  # Find the metric files in the baseline directory.
    356  dir_list = sorted(fnmatch.filter(os.listdir(baseline_dir), file_pattern))
    357 
    358  metrics = GetMetrics(baseline_dir + "/" + dir_list[0])
    359 
    360  metrics_js = 'metrics = ["' + '", "'.join(metrics) + '"];'
    361 
    362  for column in range(1, len(metrics)):
    363 
    364    for metric in ['avg','dsnr','drate']:
    365      description = {"file": ("string", "File")}
    366 
    367      # Go through each directory and add a column header to our description.
    368      countoverall = {}
    369      sumoverall = {}
    370 
    371      for directory in dirs:
    372        description[directory] = ("number", directory)
    373        countoverall[directory] = 0
    374        sumoverall[directory] = 0
    375 
    376      # Data holds the data for the visualization, name given comes from
    377      # gviz_api sample code.
    378      data = []
    379      for filename in dir_list:
    380        row = {'file': splitext(basename(filename))[0] }
    381        baseline_file_name = baseline_dir + "/" + filename
    382 
    383        # Read the metric file from each of the directories in our list.
    384        for directory in dirs:
    385          metric_file_name = directory + "/" + filename
    386 
    387          # If there is a metric file in the current directory, open it
    388          # and calculate its overall difference between it and the baseline
    389          # directory's metric file.
    390          if os.path.isfile(metric_file_name):
    391            overall = FileBetter(baseline_file_name, metric_file_name,
    392                                 column, metric)
    393            row[directory] = overall
    394 
    395            sumoverall[directory] += overall
    396            countoverall[directory] += 1
    397 
    398        data.append(row)
    399 
    400      # Add the overall numbers.
    401      row = {"file": "OVERALL" }
    402      for directory in dirs:
    403        row[directory] = sumoverall[directory] / countoverall[directory]
    404      data.append(row)
    405 
    406      # write the tables out
    407      data_table = gviz_api.DataTable(description)
    408      data_table.LoadData(data)
    409 
    410      filestable[metric] = ( filestable[metric] + "filestable_" + metric +
    411                             "[" + str(column) + "]=" +
    412                             data_table.ToJSon(columns_order=["file"]+dirs) + "\n" )
    413 
    414    filestable_avg = filestable['avg']
    415    filestable_dpsnr = filestable['dsnr']
    416    filestable_drate = filestable['drate']
    417 
    418    # Now we collect all the data for all the graphs.  First the column
    419    # headers which will be Datarate and then each directory.
    420    columns = ("datarate",baseline_dir)
    421    description = {"datarate":("number", "Datarate")}
    422    for directory in dirs:
    423      description[directory] = ("number", directory)
    424 
    425    description[baseline_dir] = ("number", baseline_dir)
    426 
    427    snrs = snrs + "snrs[" + str(column) + "] = ["
    428 
    429    # Now collect the data for the graphs, file by file.
    430    for filename in dir_list:
    431 
    432      data = []
    433 
    434      # Collect the file in each directory and store all of its metrics
    435      # in the associated gviz metrics table.
    436      all_dirs = dirs + [baseline_dir]
    437      for directory in all_dirs:
    438 
    439        metric_file_name = directory + "/" + filename
    440        if not os.path.isfile(metric_file_name):
    441          continue
    442 
    443        # Read and parse the metrics file storing it to the data we'll
    444        # use for the gviz_api.Datatable.
    445        metrics = ParseMetricFile(metric_file_name, column)
    446        for bitrate, metric in metrics:
    447          data.append({"datarate": bitrate, directory: metric})
    448 
    449      data_table = gviz_api.DataTable(description)
    450      data_table.LoadData(data)
    451      snrs = snrs + "'" + data_table.ToJSon(
    452         columns_order=tuple(["datarate",baseline_dir]+dirs)) + "',"
    453 
    454    snrs = snrs + "]\n"
    455 
    456    formatters = ""
    457    for i in range(len(dirs)):
    458      formatters = "%s   formatter.format(better, %d);" % (formatters, i+1)
    459 
    460  print FillForm(page_template, vars())
    461  return
    462 
    463 if len(sys.argv) < 3:
    464  print HandleFiles.__doc__
    465 else:
    466  HandleFiles(sys.argv)