tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

grain_synthesis.c (64911B)


      1 /*
      2 * Copyright (c) 2016, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 /*!\file
     13 * \brief Describes film grain parameters and film grain synthesis
     14 *
     15 */
     16 
     17 #include <stdbool.h>
     18 #include <stdio.h>
     19 #include <string.h>
     20 #include <stdlib.h>
     21 #include <assert.h>
     22 #include "aom_dsp/aom_dsp_common.h"
     23 #include "aom_mem/aom_mem.h"
     24 #include "av1/decoder/grain_synthesis.h"
     25 
     26 // Samples with Gaussian distribution in the range of [-2048, 2047] (12 bits)
     27 // with zero mean and standard deviation of about 512.
     28 // should be divided by 4 for 10-bit range and 16 for 8-bit range.
     29 static const int gaussian_sequence[2048] = {
     30  56,    568,   -180,  172,   124,   -84,   172,   -64,   -900,  24,   820,
     31  224,   1248,  996,   272,   -8,    -916,  -388,  -732,  -104,  -188, 800,
     32  112,   -652,  -320,  -376,  140,   -252,  492,   -168,  44,    -788, 588,
     33  -584,  500,   -228,  12,    680,   272,   -476,  972,   -100,  652,  368,
     34  432,   -196,  -720,  -192,  1000,  -332,  652,   -136,  -552,  -604, -4,
     35  192,   -220,  -136,  1000,  -52,   372,   -96,   -624,  124,   -24,  396,
     36  540,   -12,   -104,  640,   464,   244,   -208,  -84,   368,   -528, -740,
     37  248,   -968,  -848,  608,   376,   -60,   -292,  -40,   -156,  252,  -292,
     38  248,   224,   -280,  400,   -244,  244,   -60,   76,    -80,   212,  532,
     39  340,   128,   -36,   824,   -352,  -60,   -264,  -96,   -612,  416,  -704,
     40  220,   -204,  640,   -160,  1220,  -408,  900,   336,   20,    -336, -96,
     41  -792,  304,   48,    -28,   -1232, -1172, -448,  104,   -292,  -520, 244,
     42  60,    -948,  0,     -708,  268,   108,   356,   -548,  488,   -344, -136,
     43  488,   -196,  -224,  656,   -236,  -1128, 60,    4,     140,   276,  -676,
     44  -376,  168,   -108,  464,   8,     564,   64,    240,   308,   -300, -400,
     45  -456,  -136,  56,    120,   -408,  -116,  436,   504,   -232,  328,  844,
     46  -164,  -84,   784,   -168,  232,   -224,  348,   -376,  128,   568,  96,
     47  -1244, -288,  276,   848,   832,   -360,  656,   464,   -384,  -332, -356,
     48  728,   -388,  160,   -192,  468,   296,   224,   140,   -776,  -100, 280,
     49  4,     196,   44,    -36,   -648,  932,   16,    1428,  28,    528,  808,
     50  772,   20,    268,   88,    -332,  -284,  124,   -384,  -448,  208,  -228,
     51  -1044, -328,  660,   380,   -148,  -300,  588,   240,   540,   28,   136,
     52  -88,   -436,  256,   296,   -1000, 1400,  0,     -48,   1056,  -136, 264,
     53  -528,  -1108, 632,   -484,  -592,  -344,  796,   124,   -668,  -768, 388,
     54  1296,  -232,  -188,  -200,  -288,  -4,    308,   100,   -168,  256,  -500,
     55  204,   -508,  648,   -136,  372,   -272,  -120,  -1004, -552,  -548, -384,
     56  548,   -296,  428,   -108,  -8,    -912,  -324,  -224,  -88,   -112, -220,
     57  -100,  996,   -796,  548,   360,   -216,  180,   428,   -200,  -212, 148,
     58  96,    148,   284,   216,   -412,  -320,  120,   -300,  -384,  -604, -572,
     59  -332,  -8,    -180,  -176,  696,   116,   -88,   628,   76,    44,   -516,
     60  240,   -208,  -40,   100,   -592,  344,   -308,  -452,  -228,  20,   916,
     61  -1752, -136,  -340,  -804,  140,   40,    512,   340,   248,   184,  -492,
     62  896,   -156,  932,   -628,  328,   -688,  -448,  -616,  -752,  -100, 560,
     63  -1020, 180,   -800,  -64,   76,    576,   1068,  396,   660,   552,  -108,
     64  -28,   320,   -628,  312,   -92,   -92,   -472,  268,   16,    560,  516,
     65  -672,  -52,   492,   -100,  260,   384,   284,   292,   304,   -148, 88,
     66  -152,  1012,  1064,  -228,  164,   -376,  -684,  592,   -392,  156,  196,
     67  -524,  -64,   -884,  160,   -176,  636,   648,   404,   -396,  -436, 864,
     68  424,   -728,  988,   -604,  904,   -592,  296,   -224,  536,   -176, -920,
     69  436,   -48,   1176,  -884,  416,   -776,  -824,  -884,  524,   -548, -564,
     70  -68,   -164,  -96,   692,   364,   -692,  -1012, -68,   260,   -480, 876,
     71  -1116, 452,   -332,  -352,  892,   -1088, 1220,  -676,  12,    -292, 244,
     72  496,   372,   -32,   280,   200,   112,   -440,  -96,   24,    -644, -184,
     73  56,    -432,  224,   -980,  272,   -260,  144,   -436,  420,   356,  364,
     74  -528,  76,    172,   -744,  -368,  404,   -752,  -416,  684,   -688, 72,
     75  540,   416,   92,    444,   480,   -72,   -1416, 164,   -1172, -68,  24,
     76  424,   264,   1040,  128,   -912,  -524,  -356,  64,    876,   -12,  4,
     77  -88,   532,   272,   -524,  320,   276,   -508,  940,   24,    -400, -120,
     78  756,   60,    236,   -412,  100,   376,   -484,  400,   -100,  -740, -108,
     79  -260,  328,   -268,  224,   -200,  -416,  184,   -604,  -564,  -20,  296,
     80  60,    892,   -888,  60,    164,   68,    -760,  216,   -296,  904,  -336,
     81  -28,   404,   -356,  -568,  -208,  -1480, -512,  296,   328,   -360, -164,
     82  -1560, -776,  1156,  -428,  164,   -504,  -112,  120,   -216,  -148, -264,
     83  308,   32,    64,    -72,   72,    116,   176,   -64,   -272,  460,  -536,
     84  -784,  -280,  348,   108,   -752,  -132,  524,   -540,  -776,  116,  -296,
     85  -1196, -288,  -560,  1040,  -472,  116,   -848,  -1116, 116,   636,  696,
     86  284,   -176,  1016,  204,   -864,  -648,  -248,  356,   972,   -584, -204,
     87  264,   880,   528,   -24,   -184,  116,   448,   -144,  828,   524,  212,
     88  -212,  52,    12,    200,   268,   -488,  -404,  -880,  824,   -672, -40,
     89  908,   -248,  500,   716,   -576,  492,   -576,  16,    720,   -108, 384,
     90  124,   344,   280,   576,   -500,  252,   104,   -308,  196,   -188, -8,
     91  1268,  296,   1032,  -1196, 436,   316,   372,   -432,  -200,  -660, 704,
     92  -224,  596,   -132,  268,   32,    -452,  884,   104,   -1008, 424,  -1348,
     93  -280,  4,     -1168, 368,   476,   696,   300,   -8,    24,    180,  -592,
     94  -196,  388,   304,   500,   724,   -160,  244,   -84,   272,   -256, -420,
     95  320,   208,   -144,  -156,  156,   364,   452,   28,    540,   316,  220,
     96  -644,  -248,  464,   72,    360,   32,    -388,  496,   -680,  -48,  208,
     97  -116,  -408,  60,    -604,  -392,  548,   -840,  784,   -460,  656,  -544,
     98  -388,  -264,  908,   -800,  -628,  -612,  -568,  572,   -220,  164,  288,
     99  -16,   -308,  308,   -112,  -636,  -760,  280,   -668,  432,   364,  240,
    100  -196,  604,   340,   384,   196,   592,   -44,   -500,  432,   -580, -132,
    101  636,   -76,   392,   4,     -412,  540,   508,   328,   -356,  -36,  16,
    102  -220,  -64,   -248,  -60,   24,    -192,  368,   1040,  92,    -24,  -1044,
    103  -32,   40,    104,   148,   192,   -136,  -520,  56,    -816,  -224, 732,
    104  392,   356,   212,   -80,   -424,  -1008, -324,  588,   -1496, 576,  460,
    105  -816,  -848,  56,    -580,  -92,   -1372, -112,  -496,  200,   364,  52,
    106  -140,  48,    -48,   -60,   84,    72,    40,    132,   -356,  -268, -104,
    107  -284,  -404,  732,   -520,  164,   -304,  -540,  120,   328,   -76,  -460,
    108  756,   388,   588,   236,   -436,  -72,   -176,  -404,  -316,  -148, 716,
    109  -604,  404,   -72,   -88,   -888,  -68,   944,   88,    -220,  -344, 960,
    110  472,   460,   -232,  704,   120,   832,   -228,  692,   -508,  132,  -476,
    111  844,   -748,  -364,  -44,   1116,  -1104, -1056, 76,    428,   552,  -692,
    112  60,    356,   96,    -384,  -188,  -612,  -576,  736,   508,   892,  352,
    113  -1132, 504,   -24,   -352,  324,   332,   -600,  -312,  292,   508,  -144,
    114  -8,    484,   48,    284,   -260,  -240,  256,   -100,  -292,  -204, -44,
    115  472,   -204,  908,   -188,  -1000, -256,  92,    1164,  -392,  564,  356,
    116  652,   -28,   -884,  256,   484,   -192,  760,   -176,  376,   -524, -452,
    117  -436,  860,   -736,  212,   124,   504,   -476,  468,   76,    -472, 552,
    118  -692,  -944,  -620,  740,   -240,  400,   132,   20,    192,   -196, 264,
    119  -668,  -1012, -60,   296,   -316,  -828,  76,    -156,  284,   -768, -448,
    120  -832,  148,   248,   652,   616,   1236,  288,   -328,  -400,  -124, 588,
    121  220,   520,   -696,  1032,  768,   -740,  -92,   -272,  296,   448,  -464,
    122  412,   -200,  392,   440,   -200,  264,   -152,  -260,  320,   1032, 216,
    123  320,   -8,    -64,   156,   -1016, 1084,  1172,  536,   484,   -432, 132,
    124  372,   -52,   -256,  84,    116,   -352,  48,    116,   304,   -384, 412,
    125  924,   -300,  528,   628,   180,   648,   44,    -980,  -220,  1320, 48,
    126  332,   748,   524,   -268,  -720,  540,   -276,  564,   -344,  -208, -196,
    127  436,   896,   88,    -392,  132,   80,    -964,  -288,  568,   56,   -48,
    128  -456,  888,   8,     552,   -156,  -292,  948,   288,   128,   -716, -292,
    129  1192,  -152,  876,   352,   -600,  -260,  -812,  -468,  -28,   -120, -32,
    130  -44,   1284,  496,   192,   464,   312,   -76,   -516,  -380,  -456, -1012,
    131  -48,   308,   -156,  36,    492,   -156,  -808,  188,   1652,  68,   -120,
    132  -116,  316,   160,   -140,  352,   808,   -416,  592,   316,   -480, 56,
    133  528,   -204,  -568,  372,   -232,  752,   -344,  744,   -4,    324,  -416,
    134  -600,  768,   268,   -248,  -88,   -132,  -420,  -432,  80,    -288, 404,
    135  -316,  -1216, -588,  520,   -108,  92,    -320,  368,   -480,  -216, -92,
    136  1688,  -300,  180,   1020,  -176,  820,   -68,   -228,  -260,  436,  -904,
    137  20,    40,    -508,  440,   -736,  312,   332,   204,   760,   -372, 728,
    138  96,    -20,   -632,  -520,  -560,  336,   1076,  -64,   -532,  776,  584,
    139  192,   396,   -728,  -520,  276,   -188,  80,    -52,   -612,  -252, -48,
    140  648,   212,   -688,  228,   -52,   -260,  428,   -412,  -272,  -404, 180,
    141  816,   -796,  48,    152,   484,   -88,   -216,  988,   696,   188,  -528,
    142  648,   -116,  -180,  316,   476,   12,    -564,  96,    476,   -252, -364,
    143  -376,  -392,  556,   -256,  -576,  260,   -352,  120,   -16,   -136, -260,
    144  -492,  72,    556,   660,   580,   616,   772,   436,   424,   -32,  -324,
    145  -1268, 416,   -324,  -80,   920,   160,   228,   724,   32,    -516, 64,
    146  384,   68,    -128,  136,   240,   248,   -204,  -68,   252,   -932, -120,
    147  -480,  -628,  -84,   192,   852,   -404,  -288,  -132,  204,   100,  168,
    148  -68,   -196,  -868,  460,   1080,  380,   -80,   244,   0,     484,  -888,
    149  64,    184,   352,   600,   460,   164,   604,   -196,  320,   -64,  588,
    150  -184,  228,   12,    372,   48,    -848,  -344,  224,   208,   -200, 484,
    151  128,   -20,   272,   -468,  -840,  384,   256,   -720,  -520,  -464, -580,
    152  112,   -120,  644,   -356,  -208,  -608,  -528,  704,   560,   -424, 392,
    153  828,   40,    84,    200,   -152,  0,     -144,  584,   280,   -120, 80,
    154  -556,  -972,  -196,  -472,  724,   80,    168,   -32,   88,    160,  -688,
    155  0,     160,   356,   372,   -776,  740,   -128,  676,   -248,  -480, 4,
    156  -364,  96,    544,   232,   -1032, 956,   236,   356,   20,    -40,  300,
    157  24,    -676,  -596,  132,   1120,  -104,  532,   -1096, 568,   648,  444,
    158  508,   380,   188,   -376,  -604,  1488,  424,   24,    756,   -220, -192,
    159  716,   120,   920,   688,   168,   44,    -460,  568,   284,   1144, 1160,
    160  600,   424,   888,   656,   -356,  -320,  220,   316,   -176,  -724, -188,
    161  -816,  -628,  -348,  -228,  -380,  1012,  -452,  -660,  736,   928,  404,
    162  -696,  -72,   -268,  -892,  128,   184,   -344,  -780,  360,   336,  400,
    163  344,   428,   548,   -112,  136,   -228,  -216,  -820,  -516,  340,  92,
    164  -136,  116,   -300,  376,   -244,  100,   -316,  -520,  -284,  -12,  824,
    165  164,   -548,  -180,  -128,  116,   -924,  -828,  268,   -368,  -580, 620,
    166  192,   160,   0,     -1676, 1068,  424,   -56,   -360,  468,   -156, 720,
    167  288,   -528,  556,   -364,  548,   -148,  504,   316,   152,   -648, -620,
    168  -684,  -24,   -376,  -384,  -108,  -920,  -1032, 768,   180,   -264, -508,
    169  -1268, -260,  -60,   300,   -240,  988,   724,   -376,  -576,  -212, -736,
    170  556,   192,   1092,  -620,  -880,  376,   -56,   -4,    -216,  -32,  836,
    171  268,   396,   1332,  864,   -600,  100,   56,    -412,  -92,   356,  180,
    172  884,   -468,  -436,  292,   -388,  -804,  -704,  -840,  368,   -348, 140,
    173  -724,  1536,  940,   372,   112,   -372,  436,   -480,  1136,  296,  -32,
    174  -228,  132,   -48,   -220,  868,   -1016, -60,   -1044, -464,  328,  916,
    175  244,   12,    -736,  -296,  360,   468,   -376,  -108,  -92,   788,  368,
    176  -56,   544,   400,   -672,  -420,  728,   16,    320,   44,    -284, -380,
    177  -796,  488,   132,   204,   -596,  -372,  88,    -152,  -908,  -636, -572,
    178  -624,  -116,  -692,  -200,  -56,   276,   -88,   484,   -324,  948,  864,
    179  1000,  -456,  -184,  -276,  292,   -296,  156,   676,   320,   160,  908,
    180  -84,   -1236, -288,  -116,  260,   -372,  -644,  732,   -756,  -96,  84,
    181  344,   -520,  348,   -688,  240,   -84,   216,   -1044, -136,  -676, -396,
    182  -1500, 960,   -40,   176,   168,   1516,  420,   -504,  -344,  -364, -360,
    183  1216,  -940,  -380,  -212,  252,   -660,  -708,  484,   -444,  -152, 928,
    184  -120,  1112,  476,   -260,  560,   -148,  -344,  108,   -196,  228,  -288,
    185  504,   560,   -328,  -88,   288,   -1008, 460,   -228,  468,   -836, -196,
    186  76,    388,   232,   412,   -1168, -716,  -644,  756,   -172,  -356, -504,
    187  116,   432,   528,   48,    476,   -168,  -608,  448,   160,   -532, -272,
    188  28,    -676,  -12,   828,   980,   456,   520,   104,   -104,  256,  -344,
    189  -4,    -28,   -368,  -52,   -524,  -572,  -556,  -200,  768,   1124, -208,
    190  -512,  176,   232,   248,   -148,  -888,  604,   -600,  -304,  804,  -156,
    191  -212,  488,   -192,  -804,  -256,  368,   -360,  -916,  -328,  228,  -240,
    192  -448,  -472,  856,   -556,  -364,  572,   -12,   -156,  -368,  -340, 432,
    193  252,   -752,  -152,  288,   268,   -580,  -848,  -592,  108,   -76,  244,
    194  312,   -716,  592,   -80,   436,   360,   4,     -248,  160,   516,  584,
    195  732,   44,    -468,  -280,  -292,  -156,  -588,  28,    308,   912,  24,
    196  124,   156,   180,   -252,  944,   -924,  -772,  -520,  -428,  -624, 300,
    197  -212,  -1144, 32,    -724,  800,   -1128, -212,  -1288, -848,  180,  -416,
    198  440,   192,   -576,  -792,  -76,   -1080, 80,    -532,  -352,  -132, 380,
    199  -820,  148,   1112,  128,   164,   456,   700,   -924,  144,   -668, -384,
    200  648,   -832,  508,   552,   -52,   -100,  -656,  208,   -568,  748,  -88,
    201  680,   232,   300,   192,   -408,  -1012, -152,  -252,  -268,  272,  -876,
    202  -664,  -648,  -332,  -136,  16,    12,    1152,  -28,   332,   -536, 320,
    203  -672,  -460,  -316,  532,   -260,  228,   -40,   1052,  -816,  180,  88,
    204  -496,  -556,  -672,  -368,  428,   92,    356,   404,   -408,  252,  196,
    205  -176,  -556,  792,   268,   32,    372,   40,    96,    -332,  328,  120,
    206  372,   -900,  -40,   472,   -264,  -592,  952,   128,   656,   112,  664,
    207  -232,  420,   4,     -344,  -464,  556,   244,   -416,  -32,   252,  0,
    208  -412,  188,   -696,  508,   -476,  324,   -1096, 656,   -312,  560,  264,
    209  -136,  304,   160,   -64,   -580,  248,   336,   -720,  560,   -348, -288,
    210  -276,  -196,  -500,  852,   -544,  -236,  -1128, -992,  -776,  116,  56,
    211  52,    860,   884,   212,   -12,   168,   1020,  512,   -552,  924,  -148,
    212  716,   188,   164,   -340,  -520,  -184,  880,   -152,  -680,  -208, -1156,
    213  -300,  -528,  -472,  364,   100,   -744,  -1056, -32,   540,   280,  144,
    214  -676,  -32,   -232,  -280,  -224,  96,    568,   -76,   172,   148,  148,
    215  104,   32,    -296,  -32,   788,   -80,   32,    -16,   280,   288,  944,
    216  428,   -484
    217 };
    218 
    219 static const int gauss_bits = 11;
    220 
    221 static int luma_subblock_size_y = 32;
    222 static int luma_subblock_size_x = 32;
    223 
    224 static int chroma_subblock_size_y = 16;
    225 static int chroma_subblock_size_x = 16;
    226 
    227 static const int min_luma_legal_range = 16;
    228 static const int max_luma_legal_range = 235;
    229 
    230 static const int min_chroma_legal_range = 16;
    231 static const int max_chroma_legal_range = 240;
    232 
    233 static int scaling_lut_y[256];
    234 static int scaling_lut_cb[256];
    235 static int scaling_lut_cr[256];
    236 
    237 static int grain_min;
    238 static int grain_max;
    239 
    240 static uint16_t random_register = 0;  // random number generator register
    241 
    242 static void dealloc_arrays(const aom_film_grain_t *params, int ***pred_pos_luma,
    243                           int ***pred_pos_chroma, int **luma_grain_block,
    244                           int **cb_grain_block, int **cr_grain_block,
    245                           int **y_line_buf, int **cb_line_buf,
    246                           int **cr_line_buf, int **y_col_buf, int **cb_col_buf,
    247                           int **cr_col_buf) {
    248  int num_pos_luma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
    249  int num_pos_chroma = num_pos_luma;
    250  if (params->num_y_points > 0) ++num_pos_chroma;
    251 
    252  if (*pred_pos_luma) {
    253    for (int row = 0; row < num_pos_luma; row++) {
    254      aom_free((*pred_pos_luma)[row]);
    255    }
    256    aom_free(*pred_pos_luma);
    257    *pred_pos_luma = NULL;
    258  }
    259 
    260  if (*pred_pos_chroma) {
    261    for (int row = 0; row < num_pos_chroma; row++) {
    262      aom_free((*pred_pos_chroma)[row]);
    263    }
    264    aom_free(*pred_pos_chroma);
    265    *pred_pos_chroma = NULL;
    266  }
    267 
    268  aom_free(*y_line_buf);
    269  *y_line_buf = NULL;
    270 
    271  aom_free(*cb_line_buf);
    272  *cb_line_buf = NULL;
    273 
    274  aom_free(*cr_line_buf);
    275  *cr_line_buf = NULL;
    276 
    277  aom_free(*y_col_buf);
    278  *y_col_buf = NULL;
    279 
    280  aom_free(*cb_col_buf);
    281  *cb_col_buf = NULL;
    282 
    283  aom_free(*cr_col_buf);
    284  *cr_col_buf = NULL;
    285 
    286  aom_free(*luma_grain_block);
    287  *luma_grain_block = NULL;
    288 
    289  aom_free(*cb_grain_block);
    290  *cb_grain_block = NULL;
    291 
    292  aom_free(*cr_grain_block);
    293  *cr_grain_block = NULL;
    294 }
    295 
    296 static bool init_arrays(const aom_film_grain_t *params, int luma_stride,
    297                        int chroma_stride, int ***pred_pos_luma_p,
    298                        int ***pred_pos_chroma_p, int **luma_grain_block,
    299                        int **cb_grain_block, int **cr_grain_block,
    300                        int **y_line_buf, int **cb_line_buf, int **cr_line_buf,
    301                        int **y_col_buf, int **cb_col_buf, int **cr_col_buf,
    302                        int luma_grain_samples, int chroma_grain_samples,
    303                        int chroma_subsamp_y, int chroma_subsamp_x) {
    304  *pred_pos_luma_p = NULL;
    305  *pred_pos_chroma_p = NULL;
    306  *luma_grain_block = NULL;
    307  *cb_grain_block = NULL;
    308  *cr_grain_block = NULL;
    309  *y_line_buf = NULL;
    310  *cb_line_buf = NULL;
    311  *cr_line_buf = NULL;
    312  *y_col_buf = NULL;
    313  *cb_col_buf = NULL;
    314  *cr_col_buf = NULL;
    315 
    316  memset(scaling_lut_y, 0, sizeof(*scaling_lut_y) * 256);
    317  memset(scaling_lut_cb, 0, sizeof(*scaling_lut_cb) * 256);
    318  memset(scaling_lut_cr, 0, sizeof(*scaling_lut_cr) * 256);
    319 
    320  int num_pos_luma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
    321  int num_pos_chroma = num_pos_luma;
    322  if (params->num_y_points > 0) ++num_pos_chroma;
    323 
    324  int **pred_pos_luma;
    325  int **pred_pos_chroma;
    326 
    327  pred_pos_luma = (int **)aom_calloc(num_pos_luma, sizeof(*pred_pos_luma));
    328  if (!pred_pos_luma) return false;
    329 
    330  for (int row = 0; row < num_pos_luma; row++) {
    331    pred_pos_luma[row] = (int *)aom_malloc(sizeof(**pred_pos_luma) * 3);
    332    if (!pred_pos_luma[row]) {
    333      dealloc_arrays(params, pred_pos_luma_p, pred_pos_chroma_p,
    334                     luma_grain_block, cb_grain_block, cr_grain_block,
    335                     y_line_buf, cb_line_buf, cr_line_buf, y_col_buf,
    336                     cb_col_buf, cr_col_buf);
    337      return false;
    338    }
    339  }
    340 
    341  pred_pos_chroma =
    342      (int **)aom_calloc(num_pos_chroma, sizeof(*pred_pos_chroma));
    343  if (!pred_pos_chroma) {
    344    dealloc_arrays(params, pred_pos_luma_p, pred_pos_chroma_p, luma_grain_block,
    345                   cb_grain_block, cr_grain_block, y_line_buf, cb_line_buf,
    346                   cr_line_buf, y_col_buf, cb_col_buf, cr_col_buf);
    347    return false;
    348  }
    349 
    350  for (int row = 0; row < num_pos_chroma; row++) {
    351    pred_pos_chroma[row] = (int *)aom_malloc(sizeof(**pred_pos_chroma) * 3);
    352    if (!pred_pos_chroma[row]) {
    353      dealloc_arrays(params, pred_pos_luma_p, pred_pos_chroma_p,
    354                     luma_grain_block, cb_grain_block, cr_grain_block,
    355                     y_line_buf, cb_line_buf, cr_line_buf, y_col_buf,
    356                     cb_col_buf, cr_col_buf);
    357      return false;
    358    }
    359  }
    360 
    361  int pos_ar_index = 0;
    362 
    363  for (int row = -params->ar_coeff_lag; row < 0; row++) {
    364    for (int col = -params->ar_coeff_lag; col < params->ar_coeff_lag + 1;
    365         col++) {
    366      pred_pos_luma[pos_ar_index][0] = row;
    367      pred_pos_luma[pos_ar_index][1] = col;
    368      pred_pos_luma[pos_ar_index][2] = 0;
    369 
    370      pred_pos_chroma[pos_ar_index][0] = row;
    371      pred_pos_chroma[pos_ar_index][1] = col;
    372      pred_pos_chroma[pos_ar_index][2] = 0;
    373      ++pos_ar_index;
    374    }
    375  }
    376 
    377  for (int col = -params->ar_coeff_lag; col < 0; col++) {
    378    pred_pos_luma[pos_ar_index][0] = 0;
    379    pred_pos_luma[pos_ar_index][1] = col;
    380    pred_pos_luma[pos_ar_index][2] = 0;
    381 
    382    pred_pos_chroma[pos_ar_index][0] = 0;
    383    pred_pos_chroma[pos_ar_index][1] = col;
    384    pred_pos_chroma[pos_ar_index][2] = 0;
    385 
    386    ++pos_ar_index;
    387  }
    388 
    389  if (params->num_y_points > 0) {
    390    pred_pos_chroma[pos_ar_index][0] = 0;
    391    pred_pos_chroma[pos_ar_index][1] = 0;
    392    pred_pos_chroma[pos_ar_index][2] = 1;
    393  }
    394 
    395  *pred_pos_luma_p = pred_pos_luma;
    396  *pred_pos_chroma_p = pred_pos_chroma;
    397 
    398  *y_line_buf = (int *)aom_malloc(sizeof(**y_line_buf) * luma_stride * 2);
    399  *cb_line_buf = (int *)aom_malloc(sizeof(**cb_line_buf) * chroma_stride *
    400                                   (2 >> chroma_subsamp_y));
    401  *cr_line_buf = (int *)aom_malloc(sizeof(**cr_line_buf) * chroma_stride *
    402                                   (2 >> chroma_subsamp_y));
    403 
    404  *y_col_buf =
    405      (int *)aom_malloc(sizeof(**y_col_buf) * (luma_subblock_size_y + 2) * 2);
    406  *cb_col_buf =
    407      (int *)aom_malloc(sizeof(**cb_col_buf) *
    408                        (chroma_subblock_size_y + (2 >> chroma_subsamp_y)) *
    409                        (2 >> chroma_subsamp_x));
    410  *cr_col_buf =
    411      (int *)aom_malloc(sizeof(**cr_col_buf) *
    412                        (chroma_subblock_size_y + (2 >> chroma_subsamp_y)) *
    413                        (2 >> chroma_subsamp_x));
    414 
    415  *luma_grain_block =
    416      (int *)aom_malloc(sizeof(**luma_grain_block) * luma_grain_samples);
    417  *cb_grain_block =
    418      (int *)aom_malloc(sizeof(**cb_grain_block) * chroma_grain_samples);
    419  *cr_grain_block =
    420      (int *)aom_malloc(sizeof(**cr_grain_block) * chroma_grain_samples);
    421  if (!(*pred_pos_luma_p && *pred_pos_chroma_p && *y_line_buf && *cb_line_buf &&
    422        *cr_line_buf && *y_col_buf && *cb_col_buf && *cr_col_buf &&
    423        *luma_grain_block && *cb_grain_block && *cr_grain_block)) {
    424    dealloc_arrays(params, pred_pos_luma_p, pred_pos_chroma_p, luma_grain_block,
    425                   cb_grain_block, cr_grain_block, y_line_buf, cb_line_buf,
    426                   cr_line_buf, y_col_buf, cb_col_buf, cr_col_buf);
    427    return false;
    428  }
    429  return true;
    430 }
    431 
    432 // get a number between 0 and 2^bits - 1
    433 static inline int get_random_number(int bits) {
    434  uint16_t bit;
    435  bit = ((random_register >> 0) ^ (random_register >> 1) ^
    436         (random_register >> 3) ^ (random_register >> 12)) &
    437        1;
    438  random_register = (random_register >> 1) | (bit << 15);
    439  return (random_register >> (16 - bits)) & ((1 << bits) - 1);
    440 }
    441 
    442 static void init_random_generator(int luma_line, uint16_t seed) {
    443  // same for the picture
    444 
    445  uint16_t msb = (seed >> 8) & 255;
    446  uint16_t lsb = seed & 255;
    447 
    448  random_register = (msb << 8) + lsb;
    449 
    450  //  changes for each row
    451  int luma_num = luma_line >> 5;
    452 
    453  random_register ^= ((luma_num * 37 + 178) & 255) << 8;
    454  random_register ^= ((luma_num * 173 + 105) & 255);
    455 }
    456 
    457 static void generate_luma_grain_block(
    458    const aom_film_grain_t *params, int **pred_pos_luma, int *luma_grain_block,
    459    int luma_block_size_y, int luma_block_size_x, int luma_grain_stride,
    460    int left_pad, int top_pad, int right_pad, int bottom_pad) {
    461  if (params->num_y_points == 0) {
    462    memset(luma_grain_block, 0,
    463           sizeof(*luma_grain_block) * luma_block_size_y * luma_grain_stride);
    464    return;
    465  }
    466 
    467  int bit_depth = params->bit_depth;
    468  int gauss_sec_shift = 12 - bit_depth + params->grain_scale_shift;
    469 
    470  int num_pos_luma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
    471  int rounding_offset = (1 << (params->ar_coeff_shift - 1));
    472 
    473  for (int i = 0; i < luma_block_size_y; i++)
    474    for (int j = 0; j < luma_block_size_x; j++)
    475      luma_grain_block[i * luma_grain_stride + j] =
    476          (gaussian_sequence[get_random_number(gauss_bits)] +
    477           ((1 << gauss_sec_shift) >> 1)) >>
    478          gauss_sec_shift;
    479 
    480  for (int i = top_pad; i < luma_block_size_y - bottom_pad; i++)
    481    for (int j = left_pad; j < luma_block_size_x - right_pad; j++) {
    482      int wsum = 0;
    483      for (int pos = 0; pos < num_pos_luma; pos++) {
    484        wsum = wsum + params->ar_coeffs_y[pos] *
    485                          luma_grain_block[(i + pred_pos_luma[pos][0]) *
    486                                               luma_grain_stride +
    487                                           j + pred_pos_luma[pos][1]];
    488      }
    489      luma_grain_block[i * luma_grain_stride + j] =
    490          clamp(luma_grain_block[i * luma_grain_stride + j] +
    491                    ((wsum + rounding_offset) >> params->ar_coeff_shift),
    492                grain_min, grain_max);
    493    }
    494 }
    495 
    496 static bool generate_chroma_grain_blocks(
    497    const aom_film_grain_t *params, int **pred_pos_chroma,
    498    int *luma_grain_block, int *cb_grain_block, int *cr_grain_block,
    499    int luma_grain_stride, int chroma_block_size_y, int chroma_block_size_x,
    500    int chroma_grain_stride, int left_pad, int top_pad, int right_pad,
    501    int bottom_pad, int chroma_subsamp_y, int chroma_subsamp_x) {
    502  int bit_depth = params->bit_depth;
    503  int gauss_sec_shift = 12 - bit_depth + params->grain_scale_shift;
    504 
    505  int num_pos_chroma = 2 * params->ar_coeff_lag * (params->ar_coeff_lag + 1);
    506  if (params->num_y_points > 0) ++num_pos_chroma;
    507  int rounding_offset = (1 << (params->ar_coeff_shift - 1));
    508  int chroma_grain_block_size = chroma_block_size_y * chroma_grain_stride;
    509 
    510  if (params->num_cb_points || params->chroma_scaling_from_luma) {
    511    init_random_generator(7 << 5, params->random_seed);
    512 
    513    for (int i = 0; i < chroma_block_size_y; i++)
    514      for (int j = 0; j < chroma_block_size_x; j++)
    515        cb_grain_block[i * chroma_grain_stride + j] =
    516            (gaussian_sequence[get_random_number(gauss_bits)] +
    517             ((1 << gauss_sec_shift) >> 1)) >>
    518            gauss_sec_shift;
    519  } else {
    520    memset(cb_grain_block, 0,
    521           sizeof(*cb_grain_block) * chroma_grain_block_size);
    522  }
    523 
    524  if (params->num_cr_points || params->chroma_scaling_from_luma) {
    525    init_random_generator(11 << 5, params->random_seed);
    526 
    527    for (int i = 0; i < chroma_block_size_y; i++)
    528      for (int j = 0; j < chroma_block_size_x; j++)
    529        cr_grain_block[i * chroma_grain_stride + j] =
    530            (gaussian_sequence[get_random_number(gauss_bits)] +
    531             ((1 << gauss_sec_shift) >> 1)) >>
    532            gauss_sec_shift;
    533  } else {
    534    memset(cr_grain_block, 0,
    535           sizeof(*cr_grain_block) * chroma_grain_block_size);
    536  }
    537 
    538  for (int i = top_pad; i < chroma_block_size_y - bottom_pad; i++)
    539    for (int j = left_pad; j < chroma_block_size_x - right_pad; j++) {
    540      int wsum_cb = 0;
    541      int wsum_cr = 0;
    542      for (int pos = 0; pos < num_pos_chroma; pos++) {
    543        if (pred_pos_chroma[pos][2] == 0) {
    544          wsum_cb = wsum_cb + params->ar_coeffs_cb[pos] *
    545                                  cb_grain_block[(i + pred_pos_chroma[pos][0]) *
    546                                                     chroma_grain_stride +
    547                                                 j + pred_pos_chroma[pos][1]];
    548          wsum_cr = wsum_cr + params->ar_coeffs_cr[pos] *
    549                                  cr_grain_block[(i + pred_pos_chroma[pos][0]) *
    550                                                     chroma_grain_stride +
    551                                                 j + pred_pos_chroma[pos][1]];
    552        } else if (pred_pos_chroma[pos][2] == 1) {
    553          int av_luma = 0;
    554          int luma_coord_y = ((i - top_pad) << chroma_subsamp_y) + top_pad;
    555          int luma_coord_x = ((j - left_pad) << chroma_subsamp_x) + left_pad;
    556 
    557          for (int k = luma_coord_y; k < luma_coord_y + chroma_subsamp_y + 1;
    558               k++)
    559            for (int l = luma_coord_x; l < luma_coord_x + chroma_subsamp_x + 1;
    560                 l++)
    561              av_luma += luma_grain_block[k * luma_grain_stride + l];
    562 
    563          av_luma =
    564              (av_luma + ((1 << (chroma_subsamp_y + chroma_subsamp_x)) >> 1)) >>
    565              (chroma_subsamp_y + chroma_subsamp_x);
    566 
    567          wsum_cb = wsum_cb + params->ar_coeffs_cb[pos] * av_luma;
    568          wsum_cr = wsum_cr + params->ar_coeffs_cr[pos] * av_luma;
    569        } else {
    570          fprintf(
    571              stderr,
    572              "Grain synthesis: prediction between two chroma components is "
    573              "not supported!");
    574          return false;
    575        }
    576      }
    577      if (params->num_cb_points || params->chroma_scaling_from_luma)
    578        cb_grain_block[i * chroma_grain_stride + j] =
    579            clamp(cb_grain_block[i * chroma_grain_stride + j] +
    580                      ((wsum_cb + rounding_offset) >> params->ar_coeff_shift),
    581                  grain_min, grain_max);
    582      if (params->num_cr_points || params->chroma_scaling_from_luma)
    583        cr_grain_block[i * chroma_grain_stride + j] =
    584            clamp(cr_grain_block[i * chroma_grain_stride + j] +
    585                      ((wsum_cr + rounding_offset) >> params->ar_coeff_shift),
    586                  grain_min, grain_max);
    587    }
    588  return true;
    589 }
    590 
    591 static void init_scaling_function(const int scaling_points[][2], int num_points,
    592                                  int scaling_lut[]) {
    593  if (num_points == 0) return;
    594 
    595  for (int i = 0; i < scaling_points[0][0]; i++)
    596    scaling_lut[i] = scaling_points[0][1];
    597 
    598  for (int point = 0; point < num_points - 1; point++) {
    599    int delta_y = scaling_points[point + 1][1] - scaling_points[point][1];
    600    int delta_x = scaling_points[point + 1][0] - scaling_points[point][0];
    601 
    602    int64_t delta = delta_y * ((65536 + (delta_x >> 1)) / delta_x);
    603 
    604    for (int x = 0; x < delta_x; x++) {
    605      scaling_lut[scaling_points[point][0] + x] =
    606          scaling_points[point][1] + (int)((x * delta + 32768) >> 16);
    607    }
    608  }
    609 
    610  for (int i = scaling_points[num_points - 1][0]; i < 256; i++)
    611    scaling_lut[i] = scaling_points[num_points - 1][1];
    612 }
    613 
    614 // function that extracts samples from a LUT (and interpolates intemediate
    615 // frames for 10- and 12-bit video)
    616 static int scale_LUT(int *scaling_lut, int index, int bit_depth) {
    617  int x = index >> (bit_depth - 8);
    618 
    619  if (!(bit_depth - 8) || x == 255)
    620    return scaling_lut[x];
    621  else
    622    return scaling_lut[x] + (((scaling_lut[x + 1] - scaling_lut[x]) *
    623                                  (index & ((1 << (bit_depth - 8)) - 1)) +
    624                              (1 << (bit_depth - 9))) >>
    625                             (bit_depth - 8));
    626 }
    627 
    628 static void add_noise_to_block(const aom_film_grain_t *params, uint8_t *luma,
    629                               uint8_t *cb, uint8_t *cr, int luma_stride,
    630                               int chroma_stride, int *luma_grain,
    631                               int *cb_grain, int *cr_grain,
    632                               int luma_grain_stride, int chroma_grain_stride,
    633                               int half_luma_height, int half_luma_width,
    634                               int bit_depth, int chroma_subsamp_y,
    635                               int chroma_subsamp_x, int mc_identity) {
    636  int cb_mult = params->cb_mult - 128;            // fixed scale
    637  int cb_luma_mult = params->cb_luma_mult - 128;  // fixed scale
    638  int cb_offset = params->cb_offset - 256;
    639 
    640  int cr_mult = params->cr_mult - 128;            // fixed scale
    641  int cr_luma_mult = params->cr_luma_mult - 128;  // fixed scale
    642  int cr_offset = params->cr_offset - 256;
    643 
    644  int rounding_offset = (1 << (params->scaling_shift - 1));
    645 
    646  int apply_y = params->num_y_points > 0 ? 1 : 0;
    647  int apply_cb =
    648      (params->num_cb_points > 0 || params->chroma_scaling_from_luma) ? 1 : 0;
    649  int apply_cr =
    650      (params->num_cr_points > 0 || params->chroma_scaling_from_luma) ? 1 : 0;
    651 
    652  if (params->chroma_scaling_from_luma) {
    653    cb_mult = 0;        // fixed scale
    654    cb_luma_mult = 64;  // fixed scale
    655    cb_offset = 0;
    656 
    657    cr_mult = 0;        // fixed scale
    658    cr_luma_mult = 64;  // fixed scale
    659    cr_offset = 0;
    660  }
    661 
    662  int min_luma, max_luma, min_chroma, max_chroma;
    663 
    664  if (params->clip_to_restricted_range) {
    665    min_luma = min_luma_legal_range;
    666    max_luma = max_luma_legal_range;
    667 
    668    if (mc_identity) {
    669      min_chroma = min_luma_legal_range;
    670      max_chroma = max_luma_legal_range;
    671    } else {
    672      min_chroma = min_chroma_legal_range;
    673      max_chroma = max_chroma_legal_range;
    674    }
    675  } else {
    676    min_luma = min_chroma = 0;
    677    max_luma = max_chroma = 255;
    678  }
    679 
    680  for (int i = 0; i < (half_luma_height << (1 - chroma_subsamp_y)); i++) {
    681    for (int j = 0; j < (half_luma_width << (1 - chroma_subsamp_x)); j++) {
    682      int average_luma = 0;
    683      if (chroma_subsamp_x) {
    684        average_luma = (luma[(i << chroma_subsamp_y) * luma_stride +
    685                             (j << chroma_subsamp_x)] +
    686                        luma[(i << chroma_subsamp_y) * luma_stride +
    687                             (j << chroma_subsamp_x) + 1] +
    688                        1) >>
    689                       1;
    690      } else {
    691        average_luma = luma[(i << chroma_subsamp_y) * luma_stride + j];
    692      }
    693 
    694      if (apply_cb) {
    695        cb[i * chroma_stride + j] = clamp(
    696            cb[i * chroma_stride + j] +
    697                ((scale_LUT(scaling_lut_cb,
    698                            clamp(((average_luma * cb_luma_mult +
    699                                    cb_mult * cb[i * chroma_stride + j]) >>
    700                                   6) +
    701                                      cb_offset,
    702                                  0, (256 << (bit_depth - 8)) - 1),
    703                            8) *
    704                      cb_grain[i * chroma_grain_stride + j] +
    705                  rounding_offset) >>
    706                 params->scaling_shift),
    707            min_chroma, max_chroma);
    708      }
    709 
    710      if (apply_cr) {
    711        cr[i * chroma_stride + j] = clamp(
    712            cr[i * chroma_stride + j] +
    713                ((scale_LUT(scaling_lut_cr,
    714                            clamp(((average_luma * cr_luma_mult +
    715                                    cr_mult * cr[i * chroma_stride + j]) >>
    716                                   6) +
    717                                      cr_offset,
    718                                  0, (256 << (bit_depth - 8)) - 1),
    719                            8) *
    720                      cr_grain[i * chroma_grain_stride + j] +
    721                  rounding_offset) >>
    722                 params->scaling_shift),
    723            min_chroma, max_chroma);
    724      }
    725    }
    726  }
    727 
    728  if (apply_y) {
    729    for (int i = 0; i < (half_luma_height << 1); i++) {
    730      for (int j = 0; j < (half_luma_width << 1); j++) {
    731        luma[i * luma_stride + j] =
    732            clamp(luma[i * luma_stride + j] +
    733                      ((scale_LUT(scaling_lut_y, luma[i * luma_stride + j], 8) *
    734                            luma_grain[i * luma_grain_stride + j] +
    735                        rounding_offset) >>
    736                       params->scaling_shift),
    737                  min_luma, max_luma);
    738      }
    739    }
    740  }
    741 }
    742 
    743 static void add_noise_to_block_hbd(
    744    const aom_film_grain_t *params, uint16_t *luma, uint16_t *cb, uint16_t *cr,
    745    int luma_stride, int chroma_stride, int *luma_grain, int *cb_grain,
    746    int *cr_grain, int luma_grain_stride, int chroma_grain_stride,
    747    int half_luma_height, int half_luma_width, int bit_depth,
    748    int chroma_subsamp_y, int chroma_subsamp_x, int mc_identity) {
    749  int cb_mult = params->cb_mult - 128;            // fixed scale
    750  int cb_luma_mult = params->cb_luma_mult - 128;  // fixed scale
    751  // offset value depends on the bit depth
    752  int cb_offset = (params->cb_offset << (bit_depth - 8)) - (1 << bit_depth);
    753 
    754  int cr_mult = params->cr_mult - 128;            // fixed scale
    755  int cr_luma_mult = params->cr_luma_mult - 128;  // fixed scale
    756  // offset value depends on the bit depth
    757  int cr_offset = (params->cr_offset << (bit_depth - 8)) - (1 << bit_depth);
    758 
    759  int rounding_offset = (1 << (params->scaling_shift - 1));
    760 
    761  int apply_y = params->num_y_points > 0 ? 1 : 0;
    762  int apply_cb =
    763      (params->num_cb_points > 0 || params->chroma_scaling_from_luma) > 0 ? 1
    764                                                                          : 0;
    765  int apply_cr =
    766      (params->num_cr_points > 0 || params->chroma_scaling_from_luma) > 0 ? 1
    767                                                                          : 0;
    768 
    769  if (params->chroma_scaling_from_luma) {
    770    cb_mult = 0;        // fixed scale
    771    cb_luma_mult = 64;  // fixed scale
    772    cb_offset = 0;
    773 
    774    cr_mult = 0;        // fixed scale
    775    cr_luma_mult = 64;  // fixed scale
    776    cr_offset = 0;
    777  }
    778 
    779  int min_luma, max_luma, min_chroma, max_chroma;
    780 
    781  if (params->clip_to_restricted_range) {
    782    min_luma = min_luma_legal_range << (bit_depth - 8);
    783    max_luma = max_luma_legal_range << (bit_depth - 8);
    784 
    785    if (mc_identity) {
    786      min_chroma = min_luma_legal_range << (bit_depth - 8);
    787      max_chroma = max_luma_legal_range << (bit_depth - 8);
    788    } else {
    789      min_chroma = min_chroma_legal_range << (bit_depth - 8);
    790      max_chroma = max_chroma_legal_range << (bit_depth - 8);
    791    }
    792  } else {
    793    min_luma = min_chroma = 0;
    794    max_luma = max_chroma = (256 << (bit_depth - 8)) - 1;
    795  }
    796 
    797  for (int i = 0; i < (half_luma_height << (1 - chroma_subsamp_y)); i++) {
    798    for (int j = 0; j < (half_luma_width << (1 - chroma_subsamp_x)); j++) {
    799      int average_luma = 0;
    800      if (chroma_subsamp_x) {
    801        average_luma = (luma[(i << chroma_subsamp_y) * luma_stride +
    802                             (j << chroma_subsamp_x)] +
    803                        luma[(i << chroma_subsamp_y) * luma_stride +
    804                             (j << chroma_subsamp_x) + 1] +
    805                        1) >>
    806                       1;
    807      } else {
    808        average_luma = luma[(i << chroma_subsamp_y) * luma_stride + j];
    809      }
    810 
    811      if (apply_cb) {
    812        cb[i * chroma_stride + j] = clamp(
    813            cb[i * chroma_stride + j] +
    814                ((scale_LUT(scaling_lut_cb,
    815                            clamp(((average_luma * cb_luma_mult +
    816                                    cb_mult * cb[i * chroma_stride + j]) >>
    817                                   6) +
    818                                      cb_offset,
    819                                  0, (256 << (bit_depth - 8)) - 1),
    820                            bit_depth) *
    821                      cb_grain[i * chroma_grain_stride + j] +
    822                  rounding_offset) >>
    823                 params->scaling_shift),
    824            min_chroma, max_chroma);
    825      }
    826      if (apply_cr) {
    827        cr[i * chroma_stride + j] = clamp(
    828            cr[i * chroma_stride + j] +
    829                ((scale_LUT(scaling_lut_cr,
    830                            clamp(((average_luma * cr_luma_mult +
    831                                    cr_mult * cr[i * chroma_stride + j]) >>
    832                                   6) +
    833                                      cr_offset,
    834                                  0, (256 << (bit_depth - 8)) - 1),
    835                            bit_depth) *
    836                      cr_grain[i * chroma_grain_stride + j] +
    837                  rounding_offset) >>
    838                 params->scaling_shift),
    839            min_chroma, max_chroma);
    840      }
    841    }
    842  }
    843 
    844  if (apply_y) {
    845    for (int i = 0; i < (half_luma_height << 1); i++) {
    846      for (int j = 0; j < (half_luma_width << 1); j++) {
    847        luma[i * luma_stride + j] =
    848            clamp(luma[i * luma_stride + j] +
    849                      ((scale_LUT(scaling_lut_y, luma[i * luma_stride + j],
    850                                  bit_depth) *
    851                            luma_grain[i * luma_grain_stride + j] +
    852                        rounding_offset) >>
    853                       params->scaling_shift),
    854                  min_luma, max_luma);
    855      }
    856    }
    857  }
    858 }
    859 
    860 static void copy_rect(uint8_t *src, int src_stride, uint8_t *dst,
    861                      int dst_stride, int width, int height,
    862                      int use_high_bit_depth) {
    863  int hbd_coeff = use_high_bit_depth ? 2 : 1;
    864  while (height) {
    865    memcpy(dst, src, width * sizeof(uint8_t) * hbd_coeff);
    866    src += src_stride;
    867    dst += dst_stride;
    868    --height;
    869  }
    870  return;
    871 }
    872 
    873 static void copy_area(int *src, int src_stride, int *dst, int dst_stride,
    874                      int width, int height) {
    875  while (height) {
    876    memcpy(dst, src, width * sizeof(*src));
    877    src += src_stride;
    878    dst += dst_stride;
    879    --height;
    880  }
    881  return;
    882 }
    883 
    884 static void extend_even(uint8_t *dst, int dst_stride, int width, int height,
    885                        int use_high_bit_depth) {
    886  if ((width & 1) == 0 && (height & 1) == 0) return;
    887  if (use_high_bit_depth) {
    888    uint16_t *dst16 = (uint16_t *)dst;
    889    int dst16_stride = dst_stride / 2;
    890    if (width & 1) {
    891      for (int i = 0; i < height; ++i)
    892        dst16[i * dst16_stride + width] = dst16[i * dst16_stride + width - 1];
    893    }
    894    width = (width + 1) & (~1);
    895    if (height & 1) {
    896      memcpy(&dst16[height * dst16_stride], &dst16[(height - 1) * dst16_stride],
    897             sizeof(*dst16) * width);
    898    }
    899  } else {
    900    if (width & 1) {
    901      for (int i = 0; i < height; ++i)
    902        dst[i * dst_stride + width] = dst[i * dst_stride + width - 1];
    903    }
    904    width = (width + 1) & (~1);
    905    if (height & 1) {
    906      memcpy(&dst[height * dst_stride], &dst[(height - 1) * dst_stride],
    907             sizeof(*dst) * width);
    908    }
    909  }
    910 }
    911 
    912 static void ver_boundary_overlap(int *left_block, int left_stride,
    913                                 int *right_block, int right_stride,
    914                                 int *dst_block, int dst_stride, int width,
    915                                 int height) {
    916  if (width == 1) {
    917    while (height) {
    918      *dst_block = clamp((*left_block * 23 + *right_block * 22 + 16) >> 5,
    919                         grain_min, grain_max);
    920      left_block += left_stride;
    921      right_block += right_stride;
    922      dst_block += dst_stride;
    923      --height;
    924    }
    925    return;
    926  } else if (width == 2) {
    927    while (height) {
    928      dst_block[0] = clamp((27 * left_block[0] + 17 * right_block[0] + 16) >> 5,
    929                           grain_min, grain_max);
    930      dst_block[1] = clamp((17 * left_block[1] + 27 * right_block[1] + 16) >> 5,
    931                           grain_min, grain_max);
    932      left_block += left_stride;
    933      right_block += right_stride;
    934      dst_block += dst_stride;
    935      --height;
    936    }
    937    return;
    938  }
    939 }
    940 
    941 static void hor_boundary_overlap(int *top_block, int top_stride,
    942                                 int *bottom_block, int bottom_stride,
    943                                 int *dst_block, int dst_stride, int width,
    944                                 int height) {
    945  if (height == 1) {
    946    while (width) {
    947      *dst_block = clamp((*top_block * 23 + *bottom_block * 22 + 16) >> 5,
    948                         grain_min, grain_max);
    949      ++top_block;
    950      ++bottom_block;
    951      ++dst_block;
    952      --width;
    953    }
    954    return;
    955  } else if (height == 2) {
    956    while (width) {
    957      dst_block[0] = clamp((27 * top_block[0] + 17 * bottom_block[0] + 16) >> 5,
    958                           grain_min, grain_max);
    959      dst_block[dst_stride] = clamp((17 * top_block[top_stride] +
    960                                     27 * bottom_block[bottom_stride] + 16) >>
    961                                        5,
    962                                    grain_min, grain_max);
    963      ++top_block;
    964      ++bottom_block;
    965      ++dst_block;
    966      --width;
    967    }
    968    return;
    969  }
    970 }
    971 
    972 /*!\brief Add film grain
    973 *
    974 * Add film grain to an image
    975 *
    976 * Returns 0 for success, -1 for failure
    977 *
    978 * \param[in]    grain_params     Grain parameters
    979 * \param[in]    luma             luma plane
    980 * \param[in]    cb               cb plane
    981 * \param[in]    cr               cr plane
    982 * \param[in]    height           luma plane height
    983 * \param[in]    width            luma plane width
    984 * \param[in]    luma_stride      luma plane stride
    985 * \param[in]    chroma_stride    chroma plane stride
    986 */
    987 static int add_film_grain_run(const aom_film_grain_t *params, uint8_t *luma,
    988                              uint8_t *cb, uint8_t *cr, int height, int width,
    989                              int luma_stride, int chroma_stride,
    990                              int use_high_bit_depth, int chroma_subsamp_y,
    991                              int chroma_subsamp_x, int mc_identity) {
    992  int **pred_pos_luma;
    993  int **pred_pos_chroma;
    994  int *luma_grain_block;
    995  int *cb_grain_block;
    996  int *cr_grain_block;
    997 
    998  int *y_line_buf;
    999  int *cb_line_buf;
   1000  int *cr_line_buf;
   1001 
   1002  int *y_col_buf;
   1003  int *cb_col_buf;
   1004  int *cr_col_buf;
   1005 
   1006  random_register = params->random_seed;
   1007 
   1008  int left_pad = 3;
   1009  int right_pad = 3;  // padding to offset for AR coefficients
   1010  int top_pad = 3;
   1011  int bottom_pad = 0;
   1012 
   1013  int ar_padding = 3;  // maximum lag used for stabilization of AR coefficients
   1014 
   1015  luma_subblock_size_y = 32;
   1016  luma_subblock_size_x = 32;
   1017 
   1018  chroma_subblock_size_y = luma_subblock_size_y >> chroma_subsamp_y;
   1019  chroma_subblock_size_x = luma_subblock_size_x >> chroma_subsamp_x;
   1020 
   1021  // Initial padding is only needed for generation of
   1022  // film grain templates (to stabilize the AR process)
   1023  // Only a 64x64 luma and 32x32 chroma part of a template
   1024  // is used later for adding grain, padding can be discarded
   1025 
   1026  int luma_block_size_y =
   1027      top_pad + 2 * ar_padding + luma_subblock_size_y * 2 + bottom_pad;
   1028  int luma_block_size_x = left_pad + 2 * ar_padding + luma_subblock_size_x * 2 +
   1029                          2 * ar_padding + right_pad;
   1030 
   1031  int chroma_block_size_y = top_pad + (2 >> chroma_subsamp_y) * ar_padding +
   1032                            chroma_subblock_size_y * 2 + bottom_pad;
   1033  int chroma_block_size_x = left_pad + (2 >> chroma_subsamp_x) * ar_padding +
   1034                            chroma_subblock_size_x * 2 +
   1035                            (2 >> chroma_subsamp_x) * ar_padding + right_pad;
   1036 
   1037  int luma_grain_stride = luma_block_size_x;
   1038  int chroma_grain_stride = chroma_block_size_x;
   1039 
   1040  int overlap = params->overlap_flag;
   1041  int bit_depth = params->bit_depth;
   1042 
   1043  const int grain_center = 128 << (bit_depth - 8);
   1044  grain_min = 0 - grain_center;
   1045  grain_max = grain_center - 1;
   1046 
   1047  if (!init_arrays(params, luma_stride, chroma_stride, &pred_pos_luma,
   1048                   &pred_pos_chroma, &luma_grain_block, &cb_grain_block,
   1049                   &cr_grain_block, &y_line_buf, &cb_line_buf, &cr_line_buf,
   1050                   &y_col_buf, &cb_col_buf, &cr_col_buf,
   1051                   luma_block_size_y * luma_block_size_x,
   1052                   chroma_block_size_y * chroma_block_size_x, chroma_subsamp_y,
   1053                   chroma_subsamp_x))
   1054    return -1;
   1055 
   1056  generate_luma_grain_block(params, pred_pos_luma, luma_grain_block,
   1057                            luma_block_size_y, luma_block_size_x,
   1058                            luma_grain_stride, left_pad, top_pad, right_pad,
   1059                            bottom_pad);
   1060 
   1061  if (!generate_chroma_grain_blocks(
   1062          params, pred_pos_chroma, luma_grain_block, cb_grain_block,
   1063          cr_grain_block, luma_grain_stride, chroma_block_size_y,
   1064          chroma_block_size_x, chroma_grain_stride, left_pad, top_pad,
   1065          right_pad, bottom_pad, chroma_subsamp_y, chroma_subsamp_x))
   1066    return -1;
   1067 
   1068  init_scaling_function(params->scaling_points_y, params->num_y_points,
   1069                        scaling_lut_y);
   1070 
   1071  if (params->chroma_scaling_from_luma) {
   1072    memcpy(scaling_lut_cb, scaling_lut_y, sizeof(*scaling_lut_y) * 256);
   1073    memcpy(scaling_lut_cr, scaling_lut_y, sizeof(*scaling_lut_y) * 256);
   1074  } else {
   1075    init_scaling_function(params->scaling_points_cb, params->num_cb_points,
   1076                          scaling_lut_cb);
   1077    init_scaling_function(params->scaling_points_cr, params->num_cr_points,
   1078                          scaling_lut_cr);
   1079  }
   1080  for (int y = 0; y < height / 2; y += (luma_subblock_size_y >> 1)) {
   1081    init_random_generator(y * 2, params->random_seed);
   1082 
   1083    for (int x = 0; x < width / 2; x += (luma_subblock_size_x >> 1)) {
   1084      int offset_y = get_random_number(8);
   1085      int offset_x = (offset_y >> 4) & 15;
   1086      offset_y &= 15;
   1087 
   1088      int luma_offset_y = left_pad + 2 * ar_padding + (offset_y << 1);
   1089      int luma_offset_x = top_pad + 2 * ar_padding + (offset_x << 1);
   1090 
   1091      int chroma_offset_y = top_pad + (2 >> chroma_subsamp_y) * ar_padding +
   1092                            offset_y * (2 >> chroma_subsamp_y);
   1093      int chroma_offset_x = left_pad + (2 >> chroma_subsamp_x) * ar_padding +
   1094                            offset_x * (2 >> chroma_subsamp_x);
   1095 
   1096      if (overlap && x) {
   1097        ver_boundary_overlap(
   1098            y_col_buf, 2,
   1099            luma_grain_block + luma_offset_y * luma_grain_stride +
   1100                luma_offset_x,
   1101            luma_grain_stride, y_col_buf, 2, 2,
   1102            AOMMIN(luma_subblock_size_y + 2, height - (y << 1)));
   1103 
   1104        ver_boundary_overlap(
   1105            cb_col_buf, 2 >> chroma_subsamp_x,
   1106            cb_grain_block + chroma_offset_y * chroma_grain_stride +
   1107                chroma_offset_x,
   1108            chroma_grain_stride, cb_col_buf, 2 >> chroma_subsamp_x,
   1109            2 >> chroma_subsamp_x,
   1110            AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y),
   1111                   (height - (y << 1)) >> chroma_subsamp_y));
   1112 
   1113        ver_boundary_overlap(
   1114            cr_col_buf, 2 >> chroma_subsamp_x,
   1115            cr_grain_block + chroma_offset_y * chroma_grain_stride +
   1116                chroma_offset_x,
   1117            chroma_grain_stride, cr_col_buf, 2 >> chroma_subsamp_x,
   1118            2 >> chroma_subsamp_x,
   1119            AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y),
   1120                   (height - (y << 1)) >> chroma_subsamp_y));
   1121 
   1122        int i = y ? 1 : 0;
   1123 
   1124        if (use_high_bit_depth) {
   1125          add_noise_to_block_hbd(
   1126              params,
   1127              (uint16_t *)luma + ((y + i) << 1) * luma_stride + (x << 1),
   1128              (uint16_t *)cb +
   1129                  ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
   1130                  (x << (1 - chroma_subsamp_x)),
   1131              (uint16_t *)cr +
   1132                  ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
   1133                  (x << (1 - chroma_subsamp_x)),
   1134              luma_stride, chroma_stride, y_col_buf + i * 4,
   1135              cb_col_buf + i * (2 - chroma_subsamp_y) * (2 - chroma_subsamp_x),
   1136              cr_col_buf + i * (2 - chroma_subsamp_y) * (2 - chroma_subsamp_x),
   1137              2, (2 - chroma_subsamp_x),
   1138              AOMMIN(luma_subblock_size_y >> 1, height / 2 - y) - i, 1,
   1139              bit_depth, chroma_subsamp_y, chroma_subsamp_x, mc_identity);
   1140        } else {
   1141          add_noise_to_block(
   1142              params, luma + ((y + i) << 1) * luma_stride + (x << 1),
   1143              cb + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
   1144                  (x << (1 - chroma_subsamp_x)),
   1145              cr + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
   1146                  (x << (1 - chroma_subsamp_x)),
   1147              luma_stride, chroma_stride, y_col_buf + i * 4,
   1148              cb_col_buf + i * (2 - chroma_subsamp_y) * (2 - chroma_subsamp_x),
   1149              cr_col_buf + i * (2 - chroma_subsamp_y) * (2 - chroma_subsamp_x),
   1150              2, (2 - chroma_subsamp_x),
   1151              AOMMIN(luma_subblock_size_y >> 1, height / 2 - y) - i, 1,
   1152              bit_depth, chroma_subsamp_y, chroma_subsamp_x, mc_identity);
   1153        }
   1154      }
   1155 
   1156      if (overlap && y) {
   1157        if (x) {
   1158          hor_boundary_overlap(y_line_buf + (x << 1), luma_stride, y_col_buf, 2,
   1159                               y_line_buf + (x << 1), luma_stride, 2, 2);
   1160 
   1161          hor_boundary_overlap(cb_line_buf + x * (2 >> chroma_subsamp_x),
   1162                               chroma_stride, cb_col_buf, 2 >> chroma_subsamp_x,
   1163                               cb_line_buf + x * (2 >> chroma_subsamp_x),
   1164                               chroma_stride, 2 >> chroma_subsamp_x,
   1165                               2 >> chroma_subsamp_y);
   1166 
   1167          hor_boundary_overlap(cr_line_buf + x * (2 >> chroma_subsamp_x),
   1168                               chroma_stride, cr_col_buf, 2 >> chroma_subsamp_x,
   1169                               cr_line_buf + x * (2 >> chroma_subsamp_x),
   1170                               chroma_stride, 2 >> chroma_subsamp_x,
   1171                               2 >> chroma_subsamp_y);
   1172        }
   1173 
   1174        hor_boundary_overlap(
   1175            y_line_buf + ((x ? x + 1 : 0) << 1), luma_stride,
   1176            luma_grain_block + luma_offset_y * luma_grain_stride +
   1177                luma_offset_x + (x ? 2 : 0),
   1178            luma_grain_stride, y_line_buf + ((x ? x + 1 : 0) << 1), luma_stride,
   1179            AOMMIN(luma_subblock_size_x - ((x ? 1 : 0) << 1),
   1180                   width - ((x ? x + 1 : 0) << 1)),
   1181            2);
   1182 
   1183        hor_boundary_overlap(
   1184            cb_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
   1185            chroma_stride,
   1186            cb_grain_block + chroma_offset_y * chroma_grain_stride +
   1187                chroma_offset_x + ((x ? 1 : 0) << (1 - chroma_subsamp_x)),
   1188            chroma_grain_stride,
   1189            cb_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
   1190            chroma_stride,
   1191            AOMMIN(chroma_subblock_size_x -
   1192                       ((x ? 1 : 0) << (1 - chroma_subsamp_x)),
   1193                   (width - ((x ? x + 1 : 0) << 1)) >> chroma_subsamp_x),
   1194            2 >> chroma_subsamp_y);
   1195 
   1196        hor_boundary_overlap(
   1197            cr_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
   1198            chroma_stride,
   1199            cr_grain_block + chroma_offset_y * chroma_grain_stride +
   1200                chroma_offset_x + ((x ? 1 : 0) << (1 - chroma_subsamp_x)),
   1201            chroma_grain_stride,
   1202            cr_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
   1203            chroma_stride,
   1204            AOMMIN(chroma_subblock_size_x -
   1205                       ((x ? 1 : 0) << (1 - chroma_subsamp_x)),
   1206                   (width - ((x ? x + 1 : 0) << 1)) >> chroma_subsamp_x),
   1207            2 >> chroma_subsamp_y);
   1208 
   1209        if (use_high_bit_depth) {
   1210          add_noise_to_block_hbd(
   1211              params, (uint16_t *)luma + (y << 1) * luma_stride + (x << 1),
   1212              (uint16_t *)cb + (y << (1 - chroma_subsamp_y)) * chroma_stride +
   1213                  (x << ((1 - chroma_subsamp_x))),
   1214              (uint16_t *)cr + (y << (1 - chroma_subsamp_y)) * chroma_stride +
   1215                  (x << ((1 - chroma_subsamp_x))),
   1216              luma_stride, chroma_stride, y_line_buf + (x << 1),
   1217              cb_line_buf + (x << (1 - chroma_subsamp_x)),
   1218              cr_line_buf + (x << (1 - chroma_subsamp_x)), luma_stride,
   1219              chroma_stride, 1,
   1220              AOMMIN(luma_subblock_size_x >> 1, width / 2 - x), bit_depth,
   1221              chroma_subsamp_y, chroma_subsamp_x, mc_identity);
   1222        } else {
   1223          add_noise_to_block(
   1224              params, luma + (y << 1) * luma_stride + (x << 1),
   1225              cb + (y << (1 - chroma_subsamp_y)) * chroma_stride +
   1226                  (x << ((1 - chroma_subsamp_x))),
   1227              cr + (y << (1 - chroma_subsamp_y)) * chroma_stride +
   1228                  (x << ((1 - chroma_subsamp_x))),
   1229              luma_stride, chroma_stride, y_line_buf + (x << 1),
   1230              cb_line_buf + (x << (1 - chroma_subsamp_x)),
   1231              cr_line_buf + (x << (1 - chroma_subsamp_x)), luma_stride,
   1232              chroma_stride, 1,
   1233              AOMMIN(luma_subblock_size_x >> 1, width / 2 - x), bit_depth,
   1234              chroma_subsamp_y, chroma_subsamp_x, mc_identity);
   1235        }
   1236      }
   1237 
   1238      int i = overlap && y ? 1 : 0;
   1239      int j = overlap && x ? 1 : 0;
   1240 
   1241      if (use_high_bit_depth) {
   1242        add_noise_to_block_hbd(
   1243            params,
   1244            (uint16_t *)luma + ((y + i) << 1) * luma_stride + ((x + j) << 1),
   1245            (uint16_t *)cb +
   1246                ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
   1247                ((x + j) << (1 - chroma_subsamp_x)),
   1248            (uint16_t *)cr +
   1249                ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
   1250                ((x + j) << (1 - chroma_subsamp_x)),
   1251            luma_stride, chroma_stride,
   1252            luma_grain_block + (luma_offset_y + (i << 1)) * luma_grain_stride +
   1253                luma_offset_x + (j << 1),
   1254            cb_grain_block +
   1255                (chroma_offset_y + (i << (1 - chroma_subsamp_y))) *
   1256                    chroma_grain_stride +
   1257                chroma_offset_x + (j << (1 - chroma_subsamp_x)),
   1258            cr_grain_block +
   1259                (chroma_offset_y + (i << (1 - chroma_subsamp_y))) *
   1260                    chroma_grain_stride +
   1261                chroma_offset_x + (j << (1 - chroma_subsamp_x)),
   1262            luma_grain_stride, chroma_grain_stride,
   1263            AOMMIN(luma_subblock_size_y >> 1, height / 2 - y) - i,
   1264            AOMMIN(luma_subblock_size_x >> 1, width / 2 - x) - j, bit_depth,
   1265            chroma_subsamp_y, chroma_subsamp_x, mc_identity);
   1266      } else {
   1267        add_noise_to_block(
   1268            params, luma + ((y + i) << 1) * luma_stride + ((x + j) << 1),
   1269            cb + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
   1270                ((x + j) << (1 - chroma_subsamp_x)),
   1271            cr + ((y + i) << (1 - chroma_subsamp_y)) * chroma_stride +
   1272                ((x + j) << (1 - chroma_subsamp_x)),
   1273            luma_stride, chroma_stride,
   1274            luma_grain_block + (luma_offset_y + (i << 1)) * luma_grain_stride +
   1275                luma_offset_x + (j << 1),
   1276            cb_grain_block +
   1277                (chroma_offset_y + (i << (1 - chroma_subsamp_y))) *
   1278                    chroma_grain_stride +
   1279                chroma_offset_x + (j << (1 - chroma_subsamp_x)),
   1280            cr_grain_block +
   1281                (chroma_offset_y + (i << (1 - chroma_subsamp_y))) *
   1282                    chroma_grain_stride +
   1283                chroma_offset_x + (j << (1 - chroma_subsamp_x)),
   1284            luma_grain_stride, chroma_grain_stride,
   1285            AOMMIN(luma_subblock_size_y >> 1, height / 2 - y) - i,
   1286            AOMMIN(luma_subblock_size_x >> 1, width / 2 - x) - j, bit_depth,
   1287            chroma_subsamp_y, chroma_subsamp_x, mc_identity);
   1288      }
   1289 
   1290      if (overlap) {
   1291        if (x) {
   1292          // Copy overlapped column bufer to line buffer
   1293          copy_area(y_col_buf + (luma_subblock_size_y << 1), 2,
   1294                    y_line_buf + (x << 1), luma_stride, 2, 2);
   1295 
   1296          copy_area(
   1297              cb_col_buf + (chroma_subblock_size_y << (1 - chroma_subsamp_x)),
   1298              2 >> chroma_subsamp_x,
   1299              cb_line_buf + (x << (1 - chroma_subsamp_x)), chroma_stride,
   1300              2 >> chroma_subsamp_x, 2 >> chroma_subsamp_y);
   1301 
   1302          copy_area(
   1303              cr_col_buf + (chroma_subblock_size_y << (1 - chroma_subsamp_x)),
   1304              2 >> chroma_subsamp_x,
   1305              cr_line_buf + (x << (1 - chroma_subsamp_x)), chroma_stride,
   1306              2 >> chroma_subsamp_x, 2 >> chroma_subsamp_y);
   1307        }
   1308 
   1309        // Copy grain to the line buffer for overlap with a bottom block
   1310        copy_area(
   1311            luma_grain_block +
   1312                (luma_offset_y + luma_subblock_size_y) * luma_grain_stride +
   1313                luma_offset_x + ((x ? 2 : 0)),
   1314            luma_grain_stride, y_line_buf + ((x ? x + 1 : 0) << 1), luma_stride,
   1315            AOMMIN(luma_subblock_size_x, width - (x << 1)) - (x ? 2 : 0), 2);
   1316 
   1317        copy_area(cb_grain_block +
   1318                      (chroma_offset_y + chroma_subblock_size_y) *
   1319                          chroma_grain_stride +
   1320                      chroma_offset_x + (x ? 2 >> chroma_subsamp_x : 0),
   1321                  chroma_grain_stride,
   1322                  cb_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
   1323                  chroma_stride,
   1324                  AOMMIN(chroma_subblock_size_x,
   1325                         ((width - (x << 1)) >> chroma_subsamp_x)) -
   1326                      (x ? 2 >> chroma_subsamp_x : 0),
   1327                  2 >> chroma_subsamp_y);
   1328 
   1329        copy_area(cr_grain_block +
   1330                      (chroma_offset_y + chroma_subblock_size_y) *
   1331                          chroma_grain_stride +
   1332                      chroma_offset_x + (x ? 2 >> chroma_subsamp_x : 0),
   1333                  chroma_grain_stride,
   1334                  cr_line_buf + ((x ? x + 1 : 0) << (1 - chroma_subsamp_x)),
   1335                  chroma_stride,
   1336                  AOMMIN(chroma_subblock_size_x,
   1337                         ((width - (x << 1)) >> chroma_subsamp_x)) -
   1338                      (x ? 2 >> chroma_subsamp_x : 0),
   1339                  2 >> chroma_subsamp_y);
   1340 
   1341        // Copy grain to the column buffer for overlap with the next block to
   1342        // the right
   1343 
   1344        copy_area(luma_grain_block + luma_offset_y * luma_grain_stride +
   1345                      luma_offset_x + luma_subblock_size_x,
   1346                  luma_grain_stride, y_col_buf, 2, 2,
   1347                  AOMMIN(luma_subblock_size_y + 2, height - (y << 1)));
   1348 
   1349        copy_area(cb_grain_block + chroma_offset_y * chroma_grain_stride +
   1350                      chroma_offset_x + chroma_subblock_size_x,
   1351                  chroma_grain_stride, cb_col_buf, 2 >> chroma_subsamp_x,
   1352                  2 >> chroma_subsamp_x,
   1353                  AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y),
   1354                         (height - (y << 1)) >> chroma_subsamp_y));
   1355 
   1356        copy_area(cr_grain_block + chroma_offset_y * chroma_grain_stride +
   1357                      chroma_offset_x + chroma_subblock_size_x,
   1358                  chroma_grain_stride, cr_col_buf, 2 >> chroma_subsamp_x,
   1359                  2 >> chroma_subsamp_x,
   1360                  AOMMIN(chroma_subblock_size_y + (2 >> chroma_subsamp_y),
   1361                         (height - (y << 1)) >> chroma_subsamp_y));
   1362      }
   1363    }
   1364  }
   1365 
   1366  dealloc_arrays(params, &pred_pos_luma, &pred_pos_chroma, &luma_grain_block,
   1367                 &cb_grain_block, &cr_grain_block, &y_line_buf, &cb_line_buf,
   1368                 &cr_line_buf, &y_col_buf, &cb_col_buf, &cr_col_buf);
   1369  return 0;
   1370 }
   1371 
   1372 int av1_add_film_grain(const aom_film_grain_t *params, const aom_image_t *src,
   1373                       aom_image_t *dst) {
   1374  uint8_t *luma, *cb, *cr;
   1375  int height, width, luma_stride, chroma_stride;
   1376  int use_high_bit_depth = 0;
   1377  int chroma_subsamp_x = 0;
   1378  int chroma_subsamp_y = 0;
   1379  int mc_identity = src->mc == AOM_CICP_MC_IDENTITY ? 1 : 0;
   1380 
   1381  switch (src->fmt) {
   1382    case AOM_IMG_FMT_AOMI420:
   1383    case AOM_IMG_FMT_I420:
   1384      use_high_bit_depth = 0;
   1385      chroma_subsamp_x = 1;
   1386      chroma_subsamp_y = 1;
   1387      break;
   1388    case AOM_IMG_FMT_I42016:
   1389      use_high_bit_depth = 1;
   1390      chroma_subsamp_x = 1;
   1391      chroma_subsamp_y = 1;
   1392      break;
   1393      //    case AOM_IMG_FMT_444A:
   1394    case AOM_IMG_FMT_I444:
   1395      use_high_bit_depth = 0;
   1396      chroma_subsamp_x = 0;
   1397      chroma_subsamp_y = 0;
   1398      break;
   1399    case AOM_IMG_FMT_I44416:
   1400      use_high_bit_depth = 1;
   1401      chroma_subsamp_x = 0;
   1402      chroma_subsamp_y = 0;
   1403      break;
   1404    case AOM_IMG_FMT_I422:
   1405      use_high_bit_depth = 0;
   1406      chroma_subsamp_x = 1;
   1407      chroma_subsamp_y = 0;
   1408      break;
   1409    case AOM_IMG_FMT_I42216:
   1410      use_high_bit_depth = 1;
   1411      chroma_subsamp_x = 1;
   1412      chroma_subsamp_y = 0;
   1413      break;
   1414    default:  // unknown input format
   1415      fprintf(stderr, "Film grain error: input format is not supported!");
   1416      return -1;
   1417  }
   1418 
   1419  assert(params->bit_depth == src->bit_depth);
   1420 
   1421  dst->fmt = src->fmt;
   1422  dst->bit_depth = src->bit_depth;
   1423 
   1424  dst->r_w = src->r_w;
   1425  dst->r_h = src->r_h;
   1426  dst->d_w = src->d_w;
   1427  dst->d_h = src->d_h;
   1428 
   1429  dst->cp = src->cp;
   1430  dst->tc = src->tc;
   1431  dst->mc = src->mc;
   1432 
   1433  dst->monochrome = src->monochrome;
   1434  dst->csp = src->csp;
   1435  dst->range = src->range;
   1436 
   1437  dst->x_chroma_shift = src->x_chroma_shift;
   1438  dst->y_chroma_shift = src->y_chroma_shift;
   1439 
   1440  dst->temporal_id = src->temporal_id;
   1441  dst->spatial_id = src->spatial_id;
   1442 
   1443  width = src->d_w % 2 ? src->d_w + 1 : src->d_w;
   1444  height = src->d_h % 2 ? src->d_h + 1 : src->d_h;
   1445 
   1446  copy_rect(src->planes[AOM_PLANE_Y], src->stride[AOM_PLANE_Y],
   1447            dst->planes[AOM_PLANE_Y], dst->stride[AOM_PLANE_Y], src->d_w,
   1448            src->d_h, use_high_bit_depth);
   1449  // Note that dst is already assumed to be aligned to even.
   1450  extend_even(dst->planes[AOM_PLANE_Y], dst->stride[AOM_PLANE_Y], src->d_w,
   1451              src->d_h, use_high_bit_depth);
   1452 
   1453  if (!src->monochrome) {
   1454    copy_rect(src->planes[AOM_PLANE_U], src->stride[AOM_PLANE_U],
   1455              dst->planes[AOM_PLANE_U], dst->stride[AOM_PLANE_U],
   1456              width >> chroma_subsamp_x, height >> chroma_subsamp_y,
   1457              use_high_bit_depth);
   1458 
   1459    copy_rect(src->planes[AOM_PLANE_V], src->stride[AOM_PLANE_V],
   1460              dst->planes[AOM_PLANE_V], dst->stride[AOM_PLANE_V],
   1461              width >> chroma_subsamp_x, height >> chroma_subsamp_y,
   1462              use_high_bit_depth);
   1463  }
   1464 
   1465  luma = dst->planes[AOM_PLANE_Y];
   1466  cb = dst->planes[AOM_PLANE_U];
   1467  cr = dst->planes[AOM_PLANE_V];
   1468 
   1469  // luma and chroma strides in samples
   1470  luma_stride = dst->stride[AOM_PLANE_Y] >> use_high_bit_depth;
   1471  chroma_stride = dst->stride[AOM_PLANE_U] >> use_high_bit_depth;
   1472 
   1473  return add_film_grain_run(params, luma, cb, cr, height, width, luma_stride,
   1474                            chroma_stride, use_high_bit_depth, chroma_subsamp_y,
   1475                            chroma_subsamp_x, mc_identity);
   1476 }