tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

txfm_gen_code.cc (19288B)


      1 /*
      2 * Copyright (c) 2018, Alliance for Open Media. All rights reserved.
      3 *
      4 * This source code is subject to the terms of the BSD 2 Clause License and
      5 * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
      6 * was not distributed with this source code in the LICENSE file, you can
      7 * obtain it at www.aomedia.org/license/software. If the Alliance for Open
      8 * Media Patent License 1.0 was not distributed with this source code in the
      9 * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
     10 */
     11 
     12 #include <stdio.h>
     13 #include <stdlib.h>
     14 #include <math.h>
     15 #include <float.h>
     16 #include <string.h>
     17 
     18 #include "tools/txfm_analyzer/txfm_graph.h"
     19 
     20 typedef enum CODE_TYPE {
     21  CODE_TYPE_C,
     22  CODE_TYPE_SSE2,
     23  CODE_TYPE_SSE4_1
     24 } CODE_TYPE;
     25 
     26 int get_cos_idx(double value, int mod) {
     27  return round(acos(fabs(value)) / PI * mod);
     28 }
     29 
     30 char *cos_text_arr(double value, int mod, char *text, int size) {
     31  int num = get_cos_idx(value, mod);
     32  if (value < 0) {
     33    snprintf(text, size, "-cospi[%2d]", num);
     34  } else {
     35    snprintf(text, size, " cospi[%2d]", num);
     36  }
     37 
     38  if (num == 0)
     39    printf("v: %f -> %d/%d v==-1 is %d\n", value, num, mod, value == -1);
     40 
     41  return text;
     42 }
     43 
     44 char *cos_text_sse2(double w0, double w1, int mod, char *text, int size) {
     45  int idx0 = get_cos_idx(w0, mod);
     46  int idx1 = get_cos_idx(w1, mod);
     47  char p[] = "p";
     48  char n[] = "m";
     49  char *sgn0 = w0 < 0 ? n : p;
     50  char *sgn1 = w1 < 0 ? n : p;
     51  snprintf(text, size, "cospi_%s%02d_%s%02d", sgn0, idx0, sgn1, idx1);
     52  return text;
     53 }
     54 
     55 char *cos_text_sse4_1(double w, int mod, char *text, int size) {
     56  int idx = get_cos_idx(w, mod);
     57  char p[] = "p";
     58  char n[] = "m";
     59  char *sgn = w < 0 ? n : p;
     60  snprintf(text, size, "cospi_%s%02d", sgn, idx);
     61  return text;
     62 }
     63 
     64 void node_to_code_c(Node *node, const char *buf0, const char *buf1) {
     65  int cnt = 0;
     66  for (int i = 0; i < 2; i++) {
     67    if (fabs(node->inWeight[i]) == 1 || fabs(node->inWeight[i]) == 0) cnt++;
     68  }
     69  if (cnt == 2) {
     70    int cnt2 = 0;
     71    printf("  %s[%d] =", buf1, node->nodeIdx);
     72    for (int i = 0; i < 2; i++) {
     73      if (fabs(node->inWeight[i]) == 1) {
     74        cnt2++;
     75      }
     76    }
     77    if (cnt2 == 2) {
     78      printf(" apply_value(");
     79    }
     80    int cnt1 = 0;
     81    for (int i = 0; i < 2; i++) {
     82      if (node->inWeight[i] == 1) {
     83        if (cnt1 > 0)
     84          printf(" + %s[%d]", buf0, node->inNodeIdx[i]);
     85        else
     86          printf(" %s[%d]", buf0, node->inNodeIdx[i]);
     87        cnt1++;
     88      } else if (node->inWeight[i] == -1) {
     89        if (cnt1 > 0)
     90          printf(" - %s[%d]", buf0, node->inNodeIdx[i]);
     91        else
     92          printf("-%s[%d]", buf0, node->inNodeIdx[i]);
     93        cnt1++;
     94      }
     95    }
     96    if (cnt2 == 2) {
     97      printf(", stage_range[stage])");
     98    }
     99    printf(";\n");
    100  } else {
    101    char w0[100];
    102    char w1[100];
    103    printf(
    104        "  %s[%d] = half_btf(%s, %s[%d], %s, %s[%d], "
    105        "cos_bit);\n",
    106        buf1, node->nodeIdx, cos_text_arr(node->inWeight[0], COS_MOD, w0, 100),
    107        buf0, node->inNodeIdx[0],
    108        cos_text_arr(node->inWeight[1], COS_MOD, w1, 100), buf0,
    109        node->inNodeIdx[1]);
    110  }
    111 }
    112 
    113 void gen_code_c(Node *node, int stage_num, int node_num, TYPE_TXFM type) {
    114  char *fun_name = new char[100];
    115  get_fun_name(fun_name, 100, type, node_num);
    116 
    117  printf("\n");
    118  printf(
    119      "void av1_%s(const int32_t *input, int32_t *output, int8_t cos_bit, "
    120      "const int8_t* stage_range) "
    121      "{\n",
    122      fun_name);
    123  printf("  assert(output != input);\n");
    124  printf("  const int32_t size = %d;\n", node_num);
    125  printf("  const int32_t *cospi = cospi_arr(cos_bit);\n");
    126  printf("\n");
    127 
    128  printf("  int32_t stage = 0;\n");
    129  printf("  int32_t *bf0, *bf1;\n");
    130  printf("  int32_t step[%d];\n", node_num);
    131 
    132  const char *buf0 = "bf0";
    133  const char *buf1 = "bf1";
    134  const char *input = "input";
    135 
    136  int si = 0;
    137  printf("\n");
    138  printf("  // stage %d;\n", si);
    139  printf("  apply_range(stage, input, %s, size, stage_range[stage]);\n", input);
    140 
    141  si = 1;
    142  printf("\n");
    143  printf("  // stage %d;\n", si);
    144  printf("  stage++;\n");
    145  if (si % 2 == (stage_num - 1) % 2) {
    146    printf("  %s = output;\n", buf1);
    147  } else {
    148    printf("  %s = step;\n", buf1);
    149  }
    150 
    151  for (int ni = 0; ni < node_num; ni++) {
    152    int idx = get_idx(si, ni, node_num);
    153    node_to_code_c(node + idx, input, buf1);
    154  }
    155 
    156  printf("  range_check_buf(stage, input, bf1, size, stage_range[stage]);\n");
    157 
    158  for (int si = 2; si < stage_num; si++) {
    159    printf("\n");
    160    printf("  // stage %d\n", si);
    161    printf("  stage++;\n");
    162    if (si % 2 == (stage_num - 1) % 2) {
    163      printf("  %s = step;\n", buf0);
    164      printf("  %s = output;\n", buf1);
    165    } else {
    166      printf("  %s = output;\n", buf0);
    167      printf("  %s = step;\n", buf1);
    168    }
    169 
    170    // computation code
    171    for (int ni = 0; ni < node_num; ni++) {
    172      int idx = get_idx(si, ni, node_num);
    173      node_to_code_c(node + idx, buf0, buf1);
    174    }
    175 
    176    if (si != stage_num - 1) {
    177      printf(
    178          "  range_check_buf(stage, input, bf1, size, stage_range[stage]);\n");
    179    }
    180  }
    181  printf("  apply_range(stage, input, output, size, stage_range[stage]);\n");
    182  printf("}\n");
    183 }
    184 
    185 void single_node_to_code_sse2(Node *node, const char *buf0, const char *buf1) {
    186  printf("  %s[%2d] =", buf1, node->nodeIdx);
    187  if (node->inWeight[0] == 1 && node->inWeight[1] == 1) {
    188    printf(" _mm_adds_epi16(%s[%d], %s[%d])", buf0, node->inNodeIdx[0], buf0,
    189           node->inNodeIdx[1]);
    190  } else if (node->inWeight[0] == 1 && node->inWeight[1] == -1) {
    191    printf(" _mm_subs_epi16(%s[%d], %s[%d])", buf0, node->inNodeIdx[0], buf0,
    192           node->inNodeIdx[1]);
    193  } else if (node->inWeight[0] == -1 && node->inWeight[1] == 1) {
    194    printf(" _mm_subs_epi16(%s[%d], %s[%d])", buf0, node->inNodeIdx[1], buf0,
    195           node->inNodeIdx[0]);
    196  } else if (node->inWeight[0] == 1 && node->inWeight[1] == 0) {
    197    printf(" %s[%d]", buf0, node->inNodeIdx[0]);
    198  } else if (node->inWeight[0] == 0 && node->inWeight[1] == 1) {
    199    printf(" %s[%d]", buf0, node->inNodeIdx[1]);
    200  } else if (node->inWeight[0] == -1 && node->inWeight[1] == 0) {
    201    printf(" _mm_subs_epi16(__zero, %s[%d])", buf0, node->inNodeIdx[0]);
    202  } else if (node->inWeight[0] == 0 && node->inWeight[1] == -1) {
    203    printf(" _mm_subs_epi16(__zero, %s[%d])", buf0, node->inNodeIdx[1]);
    204  }
    205  printf(";\n");
    206 }
    207 
    208 void pair_node_to_code_sse2(Node *node, Node *partnerNode, const char *buf0,
    209                            const char *buf1) {
    210  char temp0[100];
    211  char temp1[100];
    212  // btf_16_sse2_type0(w0, w1, in0, in1, out0, out1)
    213  if (node->inNodeIdx[0] != partnerNode->inNodeIdx[0])
    214    printf("  btf_16_sse2(%s, %s, %s[%d], %s[%d], %s[%d], %s[%d]);\n",
    215           cos_text_sse2(node->inWeight[0], node->inWeight[1], COS_MOD, temp0,
    216                         100),
    217           cos_text_sse2(partnerNode->inWeight[1], partnerNode->inWeight[0],
    218                         COS_MOD, temp1, 100),
    219           buf0, node->inNodeIdx[0], buf0, node->inNodeIdx[1], buf1,
    220           node->nodeIdx, buf1, partnerNode->nodeIdx);
    221  else
    222    printf("  btf_16_sse2(%s, %s, %s[%d], %s[%d], %s[%d], %s[%d]);\n",
    223           cos_text_sse2(node->inWeight[0], node->inWeight[1], COS_MOD, temp0,
    224                         100),
    225           cos_text_sse2(partnerNode->inWeight[0], partnerNode->inWeight[1],
    226                         COS_MOD, temp1, 100),
    227           buf0, node->inNodeIdx[0], buf0, node->inNodeIdx[1], buf1,
    228           node->nodeIdx, buf1, partnerNode->nodeIdx);
    229 }
    230 
    231 Node *get_partner_node(Node *node) {
    232  int diff = node->inNode[1]->nodeIdx - node->nodeIdx;
    233  return node + diff;
    234 }
    235 
    236 void node_to_code_sse2(Node *node, const char *buf0, const char *buf1) {
    237  int cnt = 0;
    238  int cnt1 = 0;
    239  if (node->visited == 0) {
    240    node->visited = 1;
    241    for (int i = 0; i < 2; i++) {
    242      if (fabs(node->inWeight[i]) == 1 || fabs(node->inWeight[i]) == 0) cnt++;
    243      if (fabs(node->inWeight[i]) == 1) cnt1++;
    244    }
    245    if (cnt == 2) {
    246      if (cnt1 == 2) {
    247        // has a partner
    248        Node *partnerNode = get_partner_node(node);
    249        partnerNode->visited = 1;
    250        single_node_to_code_sse2(node, buf0, buf1);
    251        single_node_to_code_sse2(partnerNode, buf0, buf1);
    252      } else {
    253        single_node_to_code_sse2(node, buf0, buf1);
    254      }
    255    } else {
    256      Node *partnerNode = get_partner_node(node);
    257      partnerNode->visited = 1;
    258      pair_node_to_code_sse2(node, partnerNode, buf0, buf1);
    259    }
    260  }
    261 }
    262 
    263 void gen_cospi_list_sse2(Node *node, int stage_num, int node_num) {
    264  int visited[65][65][2][2];
    265  memset(visited, 0, sizeof(visited));
    266  char text[100];
    267  char text1[100];
    268  char text2[100];
    269  int size = 100;
    270  printf("\n");
    271  for (int si = 1; si < stage_num; si++) {
    272    for (int ni = 0; ni < node_num; ni++) {
    273      int idx = get_idx(si, ni, node_num);
    274      int cnt = 0;
    275      Node *node0 = node + idx;
    276      if (node0->visited == 0) {
    277        node0->visited = 1;
    278        for (int i = 0; i < 2; i++) {
    279          if (fabs(node0->inWeight[i]) == 1 || fabs(node0->inWeight[i]) == 0)
    280            cnt++;
    281        }
    282        if (cnt != 2) {
    283          {
    284            double w0 = node0->inWeight[0];
    285            double w1 = node0->inWeight[1];
    286            int idx0 = get_cos_idx(w0, COS_MOD);
    287            int idx1 = get_cos_idx(w1, COS_MOD);
    288            int sgn0 = w0 < 0 ? 1 : 0;
    289            int sgn1 = w1 < 0 ? 1 : 0;
    290 
    291            if (!visited[idx0][idx1][sgn0][sgn1]) {
    292              visited[idx0][idx1][sgn0][sgn1] = 1;
    293              printf("  __m128i %s = pair_set_epi16(%s, %s);\n",
    294                     cos_text_sse2(w0, w1, COS_MOD, text, size),
    295                     cos_text_arr(w0, COS_MOD, text1, size),
    296                     cos_text_arr(w1, COS_MOD, text2, size));
    297            }
    298          }
    299          Node *node1 = get_partner_node(node0);
    300          node1->visited = 1;
    301          if (node1->inNode[0]->nodeIdx != node0->inNode[0]->nodeIdx) {
    302            double w0 = node1->inWeight[0];
    303            double w1 = node1->inWeight[1];
    304            int idx0 = get_cos_idx(w0, COS_MOD);
    305            int idx1 = get_cos_idx(w1, COS_MOD);
    306            int sgn0 = w0 < 0 ? 1 : 0;
    307            int sgn1 = w1 < 0 ? 1 : 0;
    308 
    309            if (!visited[idx1][idx0][sgn1][sgn0]) {
    310              visited[idx1][idx0][sgn1][sgn0] = 1;
    311              printf("  __m128i %s = pair_set_epi16(%s, %s);\n",
    312                     cos_text_sse2(w1, w0, COS_MOD, text, size),
    313                     cos_text_arr(w1, COS_MOD, text1, size),
    314                     cos_text_arr(w0, COS_MOD, text2, size));
    315            }
    316          } else {
    317            double w0 = node1->inWeight[0];
    318            double w1 = node1->inWeight[1];
    319            int idx0 = get_cos_idx(w0, COS_MOD);
    320            int idx1 = get_cos_idx(w1, COS_MOD);
    321            int sgn0 = w0 < 0 ? 1 : 0;
    322            int sgn1 = w1 < 0 ? 1 : 0;
    323 
    324            if (!visited[idx0][idx1][sgn0][sgn1]) {
    325              visited[idx0][idx1][sgn0][sgn1] = 1;
    326              printf("  __m128i %s = pair_set_epi16(%s, %s);\n",
    327                     cos_text_sse2(w0, w1, COS_MOD, text, size),
    328                     cos_text_arr(w0, COS_MOD, text1, size),
    329                     cos_text_arr(w1, COS_MOD, text2, size));
    330            }
    331          }
    332        }
    333      }
    334    }
    335  }
    336 }
    337 
    338 void gen_code_sse2(Node *node, int stage_num, int node_num, TYPE_TXFM type) {
    339  char *fun_name = new char[100];
    340  get_fun_name(fun_name, 100, type, node_num);
    341 
    342  printf("\n");
    343  printf(
    344      "void %s_sse2(const __m128i *input, __m128i *output, int8_t cos_bit) "
    345      "{\n",
    346      fun_name);
    347 
    348  printf("  const int32_t* cospi = cospi_arr(cos_bit);\n");
    349  printf("  const __m128i __zero = _mm_setzero_si128();\n");
    350  printf("  const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1));\n");
    351 
    352  graph_reset_visited(node, stage_num, node_num);
    353  gen_cospi_list_sse2(node, stage_num, node_num);
    354  graph_reset_visited(node, stage_num, node_num);
    355  for (int si = 1; si < stage_num; si++) {
    356    char in[100];
    357    char out[100];
    358    printf("\n");
    359    printf("  // stage %d\n", si);
    360    if (si == 1)
    361      snprintf(in, 100, "%s", "input");
    362    else
    363      snprintf(in, 100, "x%d", si - 1);
    364    if (si == stage_num - 1) {
    365      snprintf(out, 100, "%s", "output");
    366    } else {
    367      snprintf(out, 100, "x%d", si);
    368      printf("  __m128i %s[%d];\n", out, node_num);
    369    }
    370    // computation code
    371    for (int ni = 0; ni < node_num; ni++) {
    372      int idx = get_idx(si, ni, node_num);
    373      node_to_code_sse2(node + idx, in, out);
    374    }
    375  }
    376 
    377  printf("}\n");
    378 }
    379 void gen_cospi_list_sse4_1(Node *node, int stage_num, int node_num) {
    380  int visited[65][2];
    381  memset(visited, 0, sizeof(visited));
    382  char text[100];
    383  char text1[100];
    384  int size = 100;
    385  printf("\n");
    386  for (int si = 1; si < stage_num; si++) {
    387    for (int ni = 0; ni < node_num; ni++) {
    388      int idx = get_idx(si, ni, node_num);
    389      Node *node0 = node + idx;
    390      if (node0->visited == 0) {
    391        int cnt = 0;
    392        node0->visited = 1;
    393        for (int i = 0; i < 2; i++) {
    394          if (fabs(node0->inWeight[i]) == 1 || fabs(node0->inWeight[i]) == 0)
    395            cnt++;
    396        }
    397        if (cnt != 2) {
    398          for (int i = 0; i < 2; i++) {
    399            if (fabs(node0->inWeight[i]) != 1 &&
    400                fabs(node0->inWeight[i]) != 0) {
    401              double w = node0->inWeight[i];
    402              int idx = get_cos_idx(w, COS_MOD);
    403              int sgn = w < 0 ? 1 : 0;
    404 
    405              if (!visited[idx][sgn]) {
    406                visited[idx][sgn] = 1;
    407                printf("  __m128i %s = _mm_set1_epi32(%s);\n",
    408                       cos_text_sse4_1(w, COS_MOD, text, size),
    409                       cos_text_arr(w, COS_MOD, text1, size));
    410              }
    411            }
    412          }
    413          Node *node1 = get_partner_node(node0);
    414          node1->visited = 1;
    415        }
    416      }
    417    }
    418  }
    419 }
    420 
    421 void single_node_to_code_sse4_1(Node *node, const char *buf0,
    422                                const char *buf1) {
    423  printf("  %s[%2d] =", buf1, node->nodeIdx);
    424  if (node->inWeight[0] == 1 && node->inWeight[1] == 1) {
    425    printf(" _mm_add_epi32(%s[%d], %s[%d])", buf0, node->inNodeIdx[0], buf0,
    426           node->inNodeIdx[1]);
    427  } else if (node->inWeight[0] == 1 && node->inWeight[1] == -1) {
    428    printf(" _mm_sub_epi32(%s[%d], %s[%d])", buf0, node->inNodeIdx[0], buf0,
    429           node->inNodeIdx[1]);
    430  } else if (node->inWeight[0] == -1 && node->inWeight[1] == 1) {
    431    printf(" _mm_sub_epi32(%s[%d], %s[%d])", buf0, node->inNodeIdx[1], buf0,
    432           node->inNodeIdx[0]);
    433  } else if (node->inWeight[0] == 1 && node->inWeight[1] == 0) {
    434    printf(" %s[%d]", buf0, node->inNodeIdx[0]);
    435  } else if (node->inWeight[0] == 0 && node->inWeight[1] == 1) {
    436    printf(" %s[%d]", buf0, node->inNodeIdx[1]);
    437  } else if (node->inWeight[0] == -1 && node->inWeight[1] == 0) {
    438    printf(" _mm_sub_epi32(__zero, %s[%d])", buf0, node->inNodeIdx[0]);
    439  } else if (node->inWeight[0] == 0 && node->inWeight[1] == -1) {
    440    printf(" _mm_sub_epi32(__zero, %s[%d])", buf0, node->inNodeIdx[1]);
    441  }
    442  printf(";\n");
    443 }
    444 
    445 void pair_node_to_code_sse4_1(Node *node, Node *partnerNode, const char *buf0,
    446                              const char *buf1) {
    447  char temp0[100];
    448  char temp1[100];
    449  if (node->inWeight[0] * partnerNode->inWeight[0] < 0) {
    450    /* type0
    451     * cos  sin
    452     * sin -cos
    453     */
    454    // btf_32_sse2_type0(w0, w1, in0, in1, out0, out1)
    455    // out0 = w0*in0 + w1*in1
    456    // out1 = -w0*in1 + w1*in0
    457    printf(
    458        "  btf_32_type0_sse4_1_new(%s, %s, %s[%d], %s[%d], %s[%d], %s[%d], "
    459        "__rounding, cos_bit);\n",
    460        cos_text_sse4_1(node->inWeight[0], COS_MOD, temp0, 100),
    461        cos_text_sse4_1(node->inWeight[1], COS_MOD, temp1, 100), buf0,
    462        node->inNodeIdx[0], buf0, node->inNodeIdx[1], buf1, node->nodeIdx, buf1,
    463        partnerNode->nodeIdx);
    464  } else {
    465    /* type1
    466     *  cos sin
    467     * -sin cos
    468     */
    469    // btf_32_sse2_type1(w0, w1, in0, in1, out0, out1)
    470    // out0 = w0*in0 + w1*in1
    471    // out1 = w0*in1 - w1*in0
    472    printf(
    473        "  btf_32_type1_sse4_1_new(%s, %s, %s[%d], %s[%d], %s[%d], %s[%d], "
    474        "__rounding, cos_bit);\n",
    475        cos_text_sse4_1(node->inWeight[0], COS_MOD, temp0, 100),
    476        cos_text_sse4_1(node->inWeight[1], COS_MOD, temp1, 100), buf0,
    477        node->inNodeIdx[0], buf0, node->inNodeIdx[1], buf1, node->nodeIdx, buf1,
    478        partnerNode->nodeIdx);
    479  }
    480 }
    481 
    482 void node_to_code_sse4_1(Node *node, const char *buf0, const char *buf1) {
    483  int cnt = 0;
    484  int cnt1 = 0;
    485  if (node->visited == 0) {
    486    node->visited = 1;
    487    for (int i = 0; i < 2; i++) {
    488      if (fabs(node->inWeight[i]) == 1 || fabs(node->inWeight[i]) == 0) cnt++;
    489      if (fabs(node->inWeight[i]) == 1) cnt1++;
    490    }
    491    if (cnt == 2) {
    492      if (cnt1 == 2) {
    493        // has a partner
    494        Node *partnerNode = get_partner_node(node);
    495        partnerNode->visited = 1;
    496        single_node_to_code_sse4_1(node, buf0, buf1);
    497        single_node_to_code_sse4_1(partnerNode, buf0, buf1);
    498      } else {
    499        single_node_to_code_sse2(node, buf0, buf1);
    500      }
    501    } else {
    502      Node *partnerNode = get_partner_node(node);
    503      partnerNode->visited = 1;
    504      pair_node_to_code_sse4_1(node, partnerNode, buf0, buf1);
    505    }
    506  }
    507 }
    508 
    509 void gen_code_sse4_1(Node *node, int stage_num, int node_num, TYPE_TXFM type) {
    510  char *fun_name = new char[100];
    511  get_fun_name(fun_name, 100, type, node_num);
    512 
    513  printf("\n");
    514  printf(
    515      "void %s_sse4_1(const __m128i *input, __m128i *output, int8_t cos_bit) "
    516      "{\n",
    517      fun_name);
    518 
    519  printf("  const int32_t* cospi = cospi_arr(cos_bit);\n");
    520  printf("  const __m128i __zero = _mm_setzero_si128();\n");
    521  printf("  const __m128i __rounding = _mm_set1_epi32(1 << (cos_bit - 1));\n");
    522 
    523  graph_reset_visited(node, stage_num, node_num);
    524  gen_cospi_list_sse4_1(node, stage_num, node_num);
    525  graph_reset_visited(node, stage_num, node_num);
    526  for (int si = 1; si < stage_num; si++) {
    527    char in[100];
    528    char out[100];
    529    printf("\n");
    530    printf("  // stage %d\n", si);
    531    if (si == 1)
    532      snprintf(in, 100, "%s", "input");
    533    else
    534      snprintf(in, 100, "x%d", si - 1);
    535    if (si == stage_num - 1) {
    536      snprintf(out, 100, "%s", "output");
    537    } else {
    538      snprintf(out, 100, "x%d", si);
    539      printf("  __m128i %s[%d];\n", out, node_num);
    540    }
    541    // computation code
    542    for (int ni = 0; ni < node_num; ni++) {
    543      int idx = get_idx(si, ni, node_num);
    544      node_to_code_sse4_1(node + idx, in, out);
    545    }
    546  }
    547 
    548  printf("}\n");
    549 }
    550 
    551 void gen_hybrid_code(CODE_TYPE code_type, TYPE_TXFM txfm_type, int node_num) {
    552  int stage_num = get_hybrid_stage_num(txfm_type, node_num);
    553 
    554  Node *node = new Node[node_num * stage_num];
    555  init_graph(node, stage_num, node_num);
    556 
    557  gen_hybrid_graph_1d(node, stage_num, node_num, 0, 0, node_num, txfm_type);
    558 
    559  switch (code_type) {
    560    case CODE_TYPE_C: gen_code_c(node, stage_num, node_num, txfm_type); break;
    561    case CODE_TYPE_SSE2:
    562      gen_code_sse2(node, stage_num, node_num, txfm_type);
    563      break;
    564    case CODE_TYPE_SSE4_1:
    565      gen_code_sse4_1(node, stage_num, node_num, txfm_type);
    566      break;
    567  }
    568 
    569  delete[] node;
    570 }
    571 
    572 int main(int argc, char **argv) {
    573  CODE_TYPE code_type = CODE_TYPE_SSE4_1;
    574  for (int txfm_type = TYPE_DCT; txfm_type < TYPE_LAST; txfm_type++) {
    575    for (int node_num = 4; node_num <= 64; node_num *= 2) {
    576      gen_hybrid_code(code_type, (TYPE_TXFM)txfm_type, node_num);
    577    }
    578  }
    579  return 0;
    580 }