tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

jsopcode.py (10593B)


      1 #!/usr/bin/env python3 -B
      2 # This Source Code Form is subject to the terms of the Mozilla Public
      3 # License, v. 2.0. If a copy of the MPL was not distributed with this file,
      4 # You can obtain one at http://mozilla.org/MPL/2.0/.
      5 
      6 import re
      7 
      8 quoted_pat = re.compile(r"([^A-Za-z0-9]|^)'([^']+)'")
      9 js_pat = re.compile(r"([^A-Za-z0-9]|^)(JS[A-Z0-9_\*]+)")
     10 
     11 
     12 def codify(text):
     13    text = re.sub(quoted_pat, "\\1<code>\\2</code>", text)
     14    text = re.sub(js_pat, "\\1<code>\\2</code>", text)
     15 
     16    return text
     17 
     18 
     19 space_star_space_pat = re.compile(r"^\s*\* ?", re.M)
     20 
     21 
     22 def get_comment_body(comment):
     23    return re.sub(space_star_space_pat, "", comment).split("\n")
     24 
     25 
     26 quote_pat = re.compile('"([^"]+)"')
     27 str_pat = re.compile("js_([^_]+)_str")
     28 
     29 
     30 def parse_name(s):
     31    m = quote_pat.search(s)
     32    if m:
     33        return m.group(1)
     34    m = str_pat.search(s)
     35    if m:
     36        return m.group(1)
     37    return s
     38 
     39 
     40 csv_pat = re.compile(", *")
     41 
     42 
     43 def parse_csv(s):
     44    a = csv_pat.split(s)
     45    if len(a) == 1 and a[0] == "":
     46        return []
     47    return a
     48 
     49 
     50 def get_stack_count(stack):
     51    if stack == "":
     52        return 0
     53    if "..." in stack:
     54        return -1
     55    return len(stack.split(","))
     56 
     57 
     58 def parse_index(comment):
     59    index = []
     60    current_types = None
     61    category_name = ""
     62    category_pat = re.compile(r"\[([^\]]+)\]")
     63    for line in get_comment_body(comment):
     64        m = category_pat.search(line)
     65        if m:
     66            category_name = m.group(1)
     67            if category_name == "Index":
     68                continue
     69            current_types = []
     70            index.append((category_name, current_types))
     71        else:
     72            type_name = line.strip()
     73            if type_name and current_types is not None:
     74                current_types.append((type_name, []))
     75 
     76    return index
     77 
     78 
     79 # Holds the information stored in the comment with the following format:
     80 #   /*
     81 #    * {desc}
     82 #    *   Category: {category_name}
     83 #    *   Type: {type_name}
     84 #    *   Operands: {operands}
     85 #    *   Stack: {stack_uses} => {stack_defs}
     86 #    */
     87 
     88 
     89 class CommentInfo:
     90    def __init__(self):
     91        self.desc = ""
     92        self.category_name = ""
     93        self.type_name = ""
     94        self.operands = ""
     95        self.stack_uses = ""
     96        self.stack_defs = ""
     97 
     98 
     99 # Holds the information stored in the macro with the following format:
    100 #   MACRO({op}, {op_snake}, {token}, {length}, {nuses}, {ndefs}, {format})
    101 # and the information from CommentInfo.
    102 
    103 
    104 class OpcodeInfo:
    105    def __init__(self, value, comment_info):
    106        self.op = ""
    107        self.op_snake = ""
    108        self.value = value
    109        self.token = ""
    110        self.length = ""
    111        self.nuses = ""
    112        self.ndefs = ""
    113        self.format_ = ""
    114 
    115        self.operands_array = []
    116        self.stack_uses_array = []
    117        self.stack_defs_array = []
    118 
    119        self.desc = comment_info.desc
    120        self.category_name = comment_info.category_name
    121        self.type_name = comment_info.type_name
    122        self.operands = comment_info.operands
    123        self.operands_array = comment_info.operands_array
    124        self.stack_uses = comment_info.stack_uses
    125        self.stack_uses_array = comment_info.stack_uses_array
    126        self.stack_defs = comment_info.stack_defs
    127        self.stack_defs_array = comment_info.stack_defs_array
    128 
    129        # List of OpcodeInfo that corresponds to macros after this.
    130        #   /*
    131        #    * comment
    132        #    */
    133        #   MACRO(Sub, ...)
    134        #   MACRO(Mul, ...)
    135        #   MACRO(Div, ...)
    136        self.group = []
    137 
    138        self.sort_key = ""
    139 
    140 
    141 def find_by_name(list, name):
    142    for n, body in list:
    143        if n == name:
    144            return body
    145 
    146    return None
    147 
    148 
    149 def add_to_index(index, opcode):
    150    types = find_by_name(index, opcode.category_name)
    151    if types is None:
    152        raise Exception(f"Category is not listed in index: {opcode.category_name}")
    153    opcodes = find_by_name(types, opcode.type_name)
    154    if opcodes is None:
    155        if opcode.type_name:
    156            raise Exception(
    157                f"Type is not listed in {opcode.category_name}: {opcode.type_name}"
    158            )
    159        types.append((opcode.type_name, [opcode]))
    160        return
    161 
    162    opcodes.append(opcode)
    163 
    164 
    165 tag_pat = re.compile(r"^\s*[A-Za-z]+:\s*|\s*$")
    166 
    167 
    168 def get_tag_value(line):
    169    return re.sub(tag_pat, "", line)
    170 
    171 
    172 RUST_OR_CPP_KEYWORDS = {
    173    "and",
    174    "case",
    175    "default",
    176    "double",
    177    "false",
    178    "goto",
    179    "in",
    180    "new",
    181    "not",
    182    "or",
    183    "return",
    184    "throw",
    185    "true",
    186    "try",
    187    "typeof",
    188    "void",
    189 }
    190 
    191 
    192 def get_opcodes(dir):
    193    iter_pat = re.compile(
    194        r"/\*(.*?)\*/"  # either a documentation comment...
    195        r"|"
    196        r"MACRO\("  # or a MACRO(...) call
    197        r"(?P<op>[^,]+),\s*"
    198        r"(?P<op_snake>[^,]+),\s*"
    199        r"(?P<token>[^,]+,)\s*"
    200        r"(?P<length>[0-9\-]+),\s*"
    201        r"(?P<nuses>[0-9\-]+),\s*"
    202        r"(?P<ndefs>[0-9\-]+),\s*"
    203        r"(?P<format>[^\)]+)"
    204        r"\)",
    205        re.S,
    206    )
    207    stack_pat = re.compile(r"^(?P<uses>.*?)" r"\s*=>\s*" r"(?P<defs>.*?)$")
    208 
    209    opcodes = dict()
    210    index = []
    211 
    212    with open(f"{dir}/js/src/vm/Opcodes.h", encoding="utf-8") as f:
    213        data = f.read()
    214 
    215    comment_info = None
    216    opcode = None
    217 
    218    # The first opcode after the comment.
    219    group_head = None
    220    next_opcode_value = 0
    221 
    222    for m in re.finditer(iter_pat, data):
    223        comment = m.group(1)
    224        op = m.group("op")
    225 
    226        if comment:
    227            if "[Index]" in comment:
    228                index = parse_index(comment)
    229                continue
    230 
    231            if "Operands:" not in comment:
    232                continue
    233 
    234            group_head = None
    235 
    236            comment_info = CommentInfo()
    237 
    238            state = "desc"
    239            stack = ""
    240            desc = ""
    241 
    242            for line in get_comment_body(comment):
    243                if line.startswith("  Category:"):
    244                    state = "category"
    245                    comment_info.category_name = get_tag_value(line)
    246                elif line.startswith("  Type:"):
    247                    state = "type"
    248                    comment_info.type_name = get_tag_value(line)
    249                elif line.startswith("  Operands:"):
    250                    state = "operands"
    251                    comment_info.operands = get_tag_value(line)
    252                elif line.startswith("  Stack:"):
    253                    state = "stack"
    254                    stack = get_tag_value(line)
    255                elif state == "desc":
    256                    desc += line + "\n"
    257                elif line.startswith("   "):
    258                    if line.isspace():
    259                        pass
    260                    elif state == "operands":
    261                        comment_info.operands += " " + line.strip()
    262                    elif state == "stack":
    263                        stack += " " + line.strip()
    264                else:
    265                    raise ValueError(
    266                        f"unrecognized line in comment: {line!r}\n\nfull comment was:\n{comment}"
    267                    )
    268 
    269            comment_info.desc = desc
    270 
    271            comment_info.operands_array = parse_csv(comment_info.operands)
    272            comment_info.stack_uses_array = parse_csv(comment_info.stack_uses)
    273            comment_info.stack_defs_array = parse_csv(comment_info.stack_defs)
    274 
    275            m2 = stack_pat.search(stack)
    276            if m2:
    277                comment_info.stack_uses = m2.group("uses")
    278                comment_info.stack_defs = m2.group("defs")
    279        else:
    280            assert op is not None
    281            opcode = OpcodeInfo(next_opcode_value, comment_info)
    282            next_opcode_value += 1
    283 
    284            opcode.op = op
    285            opcode.op_snake = m.group("op_snake")
    286            opcode.token = parse_name(m.group("token"))
    287            opcode.length = m.group("length")
    288            opcode.nuses = m.group("nuses")
    289            opcode.ndefs = m.group("ndefs")
    290            opcode.format_ = m.group("format").split("|")
    291 
    292            expected_snake = re.sub(r"(?<!^)(?=[A-Z])", "_", opcode.op).lower()
    293            if expected_snake in RUST_OR_CPP_KEYWORDS:
    294                expected_snake += "_"
    295            if opcode.op_snake != expected_snake:
    296                raise ValueError(
    297                    f"Unexpected snake-case name for {opcode.op}: expected {expected_snake!r}, got {opcode.op_snake!r}"
    298                )
    299 
    300            if not group_head:
    301                group_head = opcode
    302 
    303                opcode.sort_key = opcode.op
    304                if opcode.category_name == "":
    305                    raise Exception(f"Category is not specified for {opcode.op}")
    306                add_to_index(index, opcode)
    307            else:
    308                if group_head.length != opcode.length:
    309                    raise Exception(
    310                        "length should be same for opcodes of the"
    311                        " same group: "
    312                        f"{group_head.length}({group_head.op}) != "
    313                        f"{opcode.length}({opcode.op})"
    314                    )
    315                if group_head.nuses != opcode.nuses:
    316                    raise Exception(
    317                        "nuses should be same for opcodes of the"
    318                        " same group: "
    319                        f"{group_head.nuses}({group_head.op}) != "
    320                        f"{opcode.nuses}({opcode.op})"
    321                    )
    322                if group_head.ndefs != opcode.ndefs:
    323                    raise Exception(
    324                        "ndefs should be same for opcodes of the"
    325                        " same group: "
    326                        f"{group_head.ndefs}({group_head.op}) != "
    327                        f"{opcode.ndefs}({opcode.op})"
    328                    )
    329 
    330                group_head.group.append(opcode)
    331 
    332                if opcode.op < group_head.op:
    333                    group_head.sort_key = opcode.op
    334 
    335            opcodes[op] = opcode
    336 
    337            # Verify stack notation.
    338            nuses = int(opcode.nuses)
    339            ndefs = int(opcode.ndefs)
    340 
    341            stack_nuses = get_stack_count(opcode.stack_uses)
    342            stack_ndefs = get_stack_count(opcode.stack_defs)
    343 
    344            if (nuses != -1 and stack_nuses != -1) and nuses != stack_nuses:
    345                raise Exception(
    346                    f"nuses should match stack notation: {op}: "
    347                    f"{nuses} != {stack_nuses} "
    348                    "(stack_nuses)"
    349                )
    350            if (ndefs != -1 and stack_ndefs != -1) and ndefs != stack_ndefs:
    351                raise Exception(
    352                    f"ndefs should match stack notation: {op}: "
    353                    f"{ndefs} != {stack_ndefs} "
    354                    "(stack_ndefs)"
    355                )
    356 
    357    return index, opcodes